# Import required libraries
install.packages("GGally")
install.packages("matrixStats")
install.packages("readxl")
install.packages("lubridate")
install.packages("writexl")
library(readxl)
library(forecast)
library(data.table)
library(lubridate)
library(dplyr)
library(tidyr)
library(GGally)
library(dplyr)
library(writexl)
library(matrixStats)
The downloaded binary packages are in /var/folders/tm/h12dzwn519q9gwvz38d0m4_80000gn/T//RtmpqbESn6/downloaded_packages The downloaded binary packages are in /var/folders/tm/h12dzwn519q9gwvz38d0m4_80000gn/T//RtmpqbESn6/downloaded_packages The downloaded binary packages are in /var/folders/tm/h12dzwn519q9gwvz38d0m4_80000gn/T//RtmpqbESn6/downloaded_packages The downloaded binary packages are in /var/folders/tm/h12dzwn519q9gwvz38d0m4_80000gn/T//RtmpqbESn6/downloaded_packages The downloaded binary packages are in /var/folders/tm/h12dzwn519q9gwvz38d0m4_80000gn/T//RtmpqbESn6/downloaded_packages
tday=today("Turkey")
day_before_tday <- tday - 1
#day_before_tday <- tday - 2
prediction_day <- tday +1
start_date <- as.Date("2024-02-01")
end_date <- as.Date("2024-05-15")
file_weather = paste0("/Users/ecemozturk/Desktop/processed_weather.csv")
file_production = paste0("/Users/ecemozturk/Desktop/production-4.csv")
weather_data = fread(file_weather)
production_data = fread(file_production)
# getting full weather date and hours as a template
template_dt = unique(weather_data[,list(date,hour)])
template_dt = merge(template_dt,production_data,by=c('date','hour'),all.x=T)
#template_dt = template_dt[date<=(tday + 1)]
template_dt = template_dt[date<=(tday)]
###NA VALUES###
any_na <- anyNA(weather_data)
if (any_na) {
cat("The dataset contains NA values.\n")
# Display the count of NAs per column
print(colSums(is.na(weather_data)))
} else {
cat("The dataset does not contain any NA values.\n")
}
# Display all rows that have NA values
na_rows <- weather_data[!complete.cases(weather_data), ]
View(na_rows)
# Fill NA values with the average of the surrounding values (linear interpolation)
merged_data_filled <- weather_data %>%
mutate(across(where(is.numeric), ~ na.approx(.x, na.rm = FALSE)))
# Fill leading NAs with the next available value, upward
merged_data_filled <- merged_data_filled %>%
mutate(across(where(is.numeric), ~ na.locf(.x, fromLast = TRUE, na.rm = FALSE)))
weather_data<- merged_data_filled
The dataset contains NA values.
date hour lat
0 0 0
lon dswrf_surface tcdc_low.cloud.layer
0 25 50
tcdc_middle.cloud.layer tcdc_high.cloud.layer tcdc_entire.atmosphere
50 100 63
uswrf_top_of_atmosphere csnow_surface dlwrf_surface
75 25 25
uswrf_surface tmp_surface
50 25
| date | hour | lat | lon | dswrf_surface | tcdc_low.cloud.layer | tcdc_middle.cloud.layer | tcdc_high.cloud.layer | tcdc_entire.atmosphere | uswrf_top_of_atmosphere | csnow_surface | dlwrf_surface | uswrf_surface | tmp_surface |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <IDate> | <int> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <int> | <dbl> | <dbl> | <dbl> |
| 2022-01-13 | 23 | 37.75 | 35.00 | 0 | 56.0 | 90.1 | NA | 98.1 | 0 | 0 | 223.900 | 0 | 262.257 |
| 2022-01-13 | 23 | 38.75 | 35.25 | 0 | 100.0 | 10.0 | NA | 100.0 | 0 | 0 | 250.200 | 0 | 271.157 |
| 2022-01-13 | 23 | 37.75 | 34.50 | 0 | 36.5 | 85.5 | NA | 98.9 | 0 | 0 | 242.400 | 0 | 270.657 |
| 2022-01-13 | 23 | 38.50 | 35.25 | 0 | 29.9 | 51.2 | NA | 94.6 | 0 | 0 | 226.500 | 0 | 266.857 |
| 2022-01-13 | 23 | 37.75 | 35.50 | 0 | 46.7 | 99.8 | NA | 99.9 | 0 | 0 | 253.200 | 0 | 267.357 |
| 2022-01-13 | 23 | 38.75 | 35.50 | 0 | 100.0 | 8.7 | NA | 100.0 | 0 | 0 | 236.000 | 0 | 270.857 |
| 2022-01-13 | 23 | 38.00 | 35.25 | 0 | 99.2 | 76.1 | NA | 100.0 | 0 | 1 | 230.200 | 0 | 259.557 |
| 2022-01-13 | 23 | 38.25 | 35.50 | 0 | 99.5 | 75.0 | NA | 99.8 | 0 | 1 | 267.300 | 0 | 268.257 |
| 2022-01-13 | 23 | 38.25 | 35.25 | 0 | 100.0 | 68.7 | NA | 100.0 | 0 | 1 | 295.200 | 0 | 271.657 |
| 2022-01-13 | 23 | 37.75 | 35.25 | 0 | 62.9 | 86.2 | NA | 90.7 | 0 | 0 | 233.200 | 0 | 262.957 |
| 2022-01-13 | 23 | 38.25 | 35.00 | 0 | 99.9 | 47.3 | NA | 100.0 | 0 | 1 | 268.200 | 0 | 269.357 |
| 2022-01-13 | 23 | 38.00 | 35.50 | 0 | 87.7 | 98.1 | NA | 99.1 | 0 | 1 | 255.500 | 0 | 265.857 |
| 2022-01-13 | 23 | 38.00 | 35.00 | 0 | 99.6 | 63.7 | NA | 100.0 | 0 | 1 | 258.400 | 0 | 263.857 |
| 2022-01-13 | 23 | 37.75 | 34.75 | 0 | 10.6 | 93.3 | NA | 97.0 | 0 | 0 | 229.800 | 0 | 265.857 |
| 2022-01-13 | 23 | 38.50 | 34.50 | 0 | 100.0 | 9.7 | NA | 100.0 | 0 | 1 | 240.100 | 0 | 267.657 |
| 2022-01-13 | 23 | 38.50 | 34.75 | 0 | 98.7 | 9.3 | NA | 100.0 | 0 | 1 | 236.900 | 0 | 266.957 |
| 2022-01-13 | 23 | 38.50 | 35.00 | 0 | 99.4 | 7.0 | NA | 99.6 | 0 | 0 | 233.100 | 0 | 267.057 |
| 2022-01-13 | 23 | 38.50 | 35.50 | 0 | 98.5 | 51.7 | NA | 100.0 | 0 | 1 | 223.600 | 0 | 259.357 |
| 2022-01-13 | 23 | 38.75 | 34.50 | 0 | 98.0 | 0.0 | NA | 98.2 | 0 | 0 | 238.700 | 0 | 270.457 |
| 2022-01-13 | 23 | 38.25 | 34.50 | 0 | 100.0 | 5.0 | NA | 100.0 | 0 | 1 | 249.100 | 0 | 266.257 |
| 2022-01-13 | 23 | 38.75 | 35.00 | 0 | 95.6 | 18.7 | NA | 96.1 | 0 | 1 | 242.400 | 0 | 270.357 |
| 2022-01-13 | 23 | 38.00 | 34.75 | 0 | 96.8 | 47.7 | NA | 100.0 | 0 | 1 | 247.400 | 0 | 265.657 |
| 2022-01-13 | 23 | 38.75 | 34.75 | 0 | 85.2 | 2.2 | NA | 86.1 | 0 | 0 | 239.300 | 0 | 270.657 |
| 2022-01-13 | 23 | 38.25 | 34.75 | 0 | 100.0 | 9.8 | NA | 100.0 | 0 | 1 | 254.900 | 0 | 268.657 |
| 2022-01-13 | 23 | 38.00 | 34.50 | 0 | 6.3 | 56.0 | NA | 100.0 | 0 | 0 | 221.700 | 0 | 265.157 |
| 2022-01-27 | 3 | 38.75 | 34.50 | 0 | NA | NA | 39.1 | 100.0 | 0 | 1 | 230.487 | 0 | 257.784 |
| 2022-01-27 | 3 | 38.25 | 35.50 | 0 | NA | NA | 100.0 | 100.0 | 0 | 1 | 248.887 | 0 | 258.784 |
| 2022-01-27 | 3 | 38.00 | 35.50 | 0 | NA | NA | 100.0 | 100.0 | 0 | 0 | 245.987 | 0 | 258.984 |
| 2022-01-27 | 3 | 37.75 | 34.75 | 0 | NA | NA | 100.0 | 100.0 | 0 | 0 | 224.287 | 0 | 257.284 |
| 2022-01-27 | 3 | 38.50 | 34.50 | 0 | NA | NA | 79.0 | 100.0 | 0 | 1 | 224.387 | 0 | 256.084 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
| 2022-03-23 | 7 | 37.75 | 34.50 | 0.56 | 9.6 | 3.5 | 0.0 | NA | 0.800 | 0 | 205.727 | 0.192 | 268.461 |
| 2022-03-23 | 7 | 38.75 | 34.50 | 0.56 | 5.7 | 2.1 | 0.0 | NA | 0.736 | 0 | 205.127 | 0.144 | 269.361 |
| 2022-03-23 | 7 | 38.25 | 34.75 | 0.72 | 19.1 | 40.5 | 0.0 | NA | 1.216 | 0 | 193.527 | 0.528 | 258.261 |
| 2022-03-23 | 7 | 38.50 | 34.50 | 0.60 | 6.6 | 13.9 | 0.0 | NA | 1.024 | 0 | 190.827 | 0.432 | 256.561 |
| 2022-03-23 | 7 | 38.00 | 34.50 | 0.60 | 10.3 | 40.9 | 0.0 | NA | 0.896 | 0 | 198.327 | 0.304 | 261.761 |
| 2022-04-04 | 10 | 38.75 | 35.00 | 544.02 | 0.0 | 0.0 | 97.6 | 98.3 | NA | 0 | 266.397 | NA | 291.531 |
| 2022-04-04 | 10 | 37.75 | 35.50 | 579.18 | 0.0 | 0.0 | 72.6 | 80.7 | NA | 0 | 269.997 | NA | 293.231 |
| 2022-04-04 | 10 | 37.75 | 35.00 | 615.56 | 0.0 | 0.0 | 5.0 | 10.8 | NA | 0 | 238.297 | NA | 283.331 |
| 2022-04-04 | 10 | 37.75 | 35.25 | 614.90 | 0.0 | 0.0 | 20.0 | 26.5 | NA | 0 | 237.597 | NA | 286.931 |
| 2022-04-04 | 10 | 38.00 | 35.25 | 616.42 | 0.0 | 0.0 | 5.0 | 10.5 | NA | 0 | 232.197 | NA | 282.231 |
| 2022-04-04 | 10 | 38.50 | 35.50 | 612.84 | 0.0 | 0.0 | 9.1 | 12.2 | NA | 0 | 235.097 | NA | 277.831 |
| 2022-04-04 | 10 | 38.25 | 34.50 | 590.28 | 0.0 | 0.0 | 10.1 | 16.6 | NA | 0 | 251.497 | NA | 288.831 |
| 2022-04-04 | 10 | 38.25 | 35.00 | 591.38 | 0.0 | 0.0 | 33.0 | 37.2 | NA | 0 | 260.197 | NA | 292.231 |
| 2022-04-04 | 10 | 38.50 | 35.00 | 587.14 | 0.0 | 0.0 | 48.6 | 52.8 | NA | 0 | 255.197 | NA | 289.131 |
| 2022-04-04 | 10 | 38.25 | 35.25 | 588.62 | 0.0 | 0.0 | 6.0 | 9.9 | NA | 0 | 268.997 | NA | 292.131 |
| 2022-04-04 | 10 | 38.50 | 34.50 | 581.62 | 0.0 | 0.0 | 37.1 | 41.2 | NA | 0 | 257.197 | NA | 289.731 |
| 2022-04-04 | 10 | 38.75 | 35.50 | 546.76 | 0.0 | 0.0 | 85.2 | 88.5 | NA | 0 | 269.097 | NA | 291.931 |
| 2022-04-04 | 10 | 38.00 | 35.00 | 603.96 | 0.0 | 0.0 | 5.0 | 10.8 | NA | 0 | 243.897 | NA | 286.031 |
| 2022-04-04 | 10 | 38.75 | 34.75 | 555.22 | 0.0 | 0.0 | 92.4 | 93.6 | NA | 0 | 264.697 | NA | 292.331 |
| 2022-04-04 | 10 | 38.75 | 34.50 | 548.94 | 0.0 | 0.0 | 99.5 | 99.6 | NA | 0 | 265.897 | NA | 292.531 |
| 2022-04-04 | 10 | 38.25 | 35.50 | 592.34 | 0.0 | 0.0 | 5.0 | 9.2 | NA | 0 | 261.197 | NA | 291.331 |
| 2022-04-04 | 10 | 37.75 | 34.75 | 601.20 | 0.0 | 0.0 | 7.6 | 12.4 | NA | 0 | 254.597 | NA | 288.431 |
| 2022-04-04 | 10 | 38.75 | 35.25 | 531.68 | 0.0 | 0.0 | 100.0 | 100.0 | NA | 0 | 270.297 | NA | 291.531 |
| 2022-04-04 | 10 | 38.25 | 34.75 | 589.52 | 0.0 | 0.0 | 46.7 | 52.7 | NA | 0 | 258.097 | NA | 292.631 |
| 2022-04-04 | 10 | 38.50 | 34.75 | 585.10 | 0.0 | 0.0 | 52.9 | 57.4 | NA | 0 | 253.097 | NA | 289.231 |
| 2022-04-04 | 10 | 38.50 | 35.25 | 591.58 | 0.0 | 0.0 | 8.7 | 14.5 | NA | 0 | 255.897 | NA | 290.131 |
| 2022-04-04 | 10 | 38.00 | 34.75 | 596.30 | 0.0 | 0.0 | 5.0 | 11.1 | NA | 0 | 252.797 | NA | 288.931 |
| 2022-04-04 | 10 | 38.00 | 34.50 | 594.70 | 0.0 | 0.0 | 5.7 | 12.6 | NA | 0 | 251.297 | NA | 288.131 |
| 2022-04-04 | 10 | 38.00 | 35.50 | 592.68 | 0.0 | 0.0 | 70.4 | 77.8 | NA | 0 | 256.497 | NA | 290.531 |
| 2022-04-04 | 10 | 37.75 | 34.50 | 587.90 | 0.0 | 0.0 | 17.8 | 23.8 | NA | 0 | 269.797 | NA | 294.931 |
Error in `mutate()`: ℹ In argument: `across(where(is.numeric), ~na.approx(.x, na.rm = FALSE))`. Caused by error in `across()`: ! Can't compute column `hour`. Caused by error in `na.approx()`: ! "na.approx" fonksiyonu bulunamadı Traceback: 1. weather_data %>% mutate(across(where(is.numeric), ~na.approx(.x, . na.rm = FALSE))) 2. mutate(., across(where(is.numeric), ~na.approx(.x, na.rm = FALSE))) 3. mutate.data.frame(., across(where(is.numeric), ~na.approx(.x, . na.rm = FALSE))) 4. mutate_cols(.data, dplyr_quosures(...), by) 5. withCallingHandlers(for (i in seq_along(dots)) { . poke_error_context(dots, i, mask = mask) . context_poke("column", old_current_column) . new_columns <- mutate_col(dots[[i]], data, mask, new_columns) . }, error = dplyr_error_handler(dots = dots, mask = mask, bullets = mutate_bullets, . error_call = error_call, error_class = "dplyr:::mutate_error"), . warning = dplyr_warning_handler(state = warnings_state, mask = mask, . error_call = error_call)) 6. mutate_col(dots[[i]], data, mask, new_columns) 7. withCallingHandlers(mask$eval_all_mutate(quo), error = function(cnd) { . name <- dplyr_quosure_name(quo_data) . msg <- glue("Can't compute column `{name}`.") . abort(msg, call = call("across"), parent = cnd) . }) 8. mask$eval_all_mutate(quo) 9. eval() 10. .handleSimpleError(function (cnd) . { . name <- dplyr_quosure_name(quo_data) . msg <- glue("Can't compute column `{name}`.") . abort(msg, call = call("across"), parent = cnd) . }, "\"na.approx\" fonksiyonu bulunamadı", base::quote(na.approx(hour, . na.rm = FALSE))) 11. h(simpleError(msg, call)) 12. abort(msg, call = call("across"), parent = cnd) 13. signal_abort(cnd, .file) 14. signalCondition(cnd) 15. (function (cnd) . { . local_error_context(dots, i = frame[[i_sym]], mask = mask) . if (inherits(cnd, "dplyr:::internal_error")) { . parent <- error_cnd(message = bullets(cnd)) . } . else { . parent <- cnd . } . message <- c(cnd_bullet_header(action), i = if (has_active_group_context(mask)) cnd_bullet_cur_group_label()) . abort(message, class = error_class, parent = parent, call = error_call) . })(structure(list(message = structure("Can't compute column `hour`.", class = c("glue", . "character")), trace = structure(list(call = list(IRkernel::main(), . kernel$run(), handle_shell(), executor$execute(msg), tryCatch(evaluate(request$content$code, . envir = .GlobalEnv, output_handler = oh, stop_on_error = 1L), . interrupt = function(cond) { . log_debug("Interrupt during execution") . interrupted <<- TRUE . }, error = .self$handle_error), tryCatchList(expr, classes, . parentenv, handlers), tryCatchOne(tryCatchList(expr, . names[-nh], parentenv, handlers[-nh]), names[nh], parentenv, . handlers[[nh]]), doTryCatch(return(expr), name, parentenv, . handler), tryCatchList(expr, names[-nh], parentenv, handlers[-nh]), . tryCatchOne(expr, names, parentenv, handlers[[1L]]), doTryCatch(return(expr), . name, parentenv, handler), evaluate(request$content$code, . envir = .GlobalEnv, output_handler = oh, stop_on_error = 1L), . evaluate_call(expr, parsed$src[[i]], envir = envir, enclos = enclos, . debug = debug, last = i == length(out), use_try = stop_on_error != . 2L, keep_warning = keep_warning, keep_message = keep_message, . log_echo = log_echo, log_warning = log_warning, output_handler = output_handler, . include_timing = include_timing), timing_fn(handle(ev <- withCallingHandlers(withVisible(eval_with_user_handlers(expr, . envir, enclos, user_handlers)), warning = wHandler, error = eHandler, . message = mHandler))), handle(ev <- withCallingHandlers(withVisible(eval_with_user_handlers(expr, . envir, enclos, user_handlers)), warning = wHandler, error = eHandler, . message = mHandler)), try(f, silent = TRUE), tryCatch(expr, . error = function(e) { . call <- conditionCall(e) . if (!is.null(call)) { . if (identical(call[[1L]], quote(doTryCatch))) . call <- sys.call(-4L) . dcall <- deparse(call, nlines = 1L) . prefix <- paste("Error in", dcall, ": ") . LONG <- 75L . sm <- strsplit(conditionMessage(e), "\n")[[1L]] . w <- 14L + nchar(dcall, type = "w") + nchar(sm[1L], . type = "w") . if (is.na(w)) . w <- 14L + nchar(dcall, type = "b") + nchar(sm[1L], . type = "b") . if (w > LONG) . prefix <- paste0(prefix, "\n ") . } . else prefix <- "Error : " . msg <- paste0(prefix, conditionMessage(e), "\n") . .Internal(seterrmessage(msg[1L])) . if (!silent && isTRUE(getOption("show.error.messages"))) { . cat(msg, file = outFile) . .Internal(printDeferredWarnings()) . } . invisible(structure(msg, class = "try-error", condition = e)) . }), tryCatchList(expr, classes, parentenv, handlers), . tryCatchOne(expr, names, parentenv, handlers[[1L]]), doTryCatch(return(expr), . name, parentenv, handler), withCallingHandlers(withVisible(eval_with_user_handlers(expr, . envir, enclos, user_handlers)), warning = wHandler, error = eHandler, . message = mHandler), withVisible(eval_with_user_handlers(expr, . envir, enclos, user_handlers)), eval_with_user_handlers(expr, . envir, enclos, user_handlers), eval(expr, envir, enclos), . eval(expr, envir, enclos), weather_data %>% mutate(across(where(is.numeric), . ~na.approx(.x, na.rm = FALSE))), mutate(., across(where(is.numeric), . ~na.approx(.x, na.rm = FALSE))), mutate.data.frame(., . across(where(is.numeric), ~na.approx(.x, na.rm = FALSE))), . mutate_cols(.data, dplyr_quosures(...), by), withCallingHandlers(for (i in seq_along(dots)) { . poke_error_context(dots, i, mask = mask) . context_poke("column", old_current_column) . new_columns <- mutate_col(dots[[i]], data, mask, new_columns) . }, error = dplyr_error_handler(dots = dots, mask = mask, . bullets = mutate_bullets, error_call = error_call, error_class = "dplyr:::mutate_error"), . warning = dplyr_warning_handler(state = warnings_state, . mask = mask, error_call = error_call)), mutate_col(dots[[i]], . data, mask, new_columns), withCallingHandlers(mask$eval_all_mutate(quo), . error = function(cnd) { . name <- dplyr_quosure_name(quo_data) . msg <- glue("Can't compute column `{name}`.") . abort(msg, call = call("across"), parent = cnd) . }), mask$eval_all_mutate(quo), eval(), .handleSimpleError(`<fn>`, . "\"na.approx\" fonksiyonu bulunamadı", base::quote(na.approx(hour, . na.rm = FALSE))), h(simpleError(msg, call)), abort(msg, . call = call("across"), parent = cnd)), parent = c(0L, . 1L, 2L, 3L, 4L, 5L, 6L, 7L, 6L, 9L, 10L, 4L, 12L, 13L, 13L, 15L, . 16L, 17L, 18L, 19L, 13L, 13L, 13L, 23L, 24L, 0L, 0L, 0L, 28L, . 29L, 29L, 31L, 31L, 33L, 0L, 35L, 36L), visible = c(TRUE, TRUE, . TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, . TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, . TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, . FALSE, FALSE), namespace = c("IRkernel", NA, "IRkernel", NA, . "base", "base", "base", "base", "base", "base", "base", "evaluate", . "evaluate", "evaluate", "evaluate", "base", "base", "base", "base", . "base", "base", "base", "evaluate", "base", "base", NA, "dplyr", . "dplyr", "dplyr", "base", "dplyr", "base", NA, "dplyr", "base", . "dplyr", "rlang"), scope = c("::", NA, "local", NA, "::", "local", . "local", "local", "local", "local", "local", "::", ":::", "local", . "local", "::", "::", "local", "local", "local", "::", "::", ":::", . "::", "::", NA, "::", ":::", ":::", "::", ":::", "::", NA, "local", . "::", "local", "::")), row.names = c(NA, -37L), version = 2L, class = c("rlang_trace", . "rlib_trace", "tbl", "data.frame")), parent = structure(list( . message = "\"na.approx\" fonksiyonu bulunamadı", call = na.approx(hour, . na.rm = FALSE)), class = c("simpleError", "error", "condition" . )), rlang = list(inherit = TRUE), call = across(), use_cli_format = TRUE), class = c("rlang_error", . "error", "condition"))) 16. abort(message, class = error_class, parent = parent, call = error_call) 17. signal_abort(cnd, .file)
#Coordinate aggregation by long to wide format
long_weather <- weather_data
long_weather <- melt(weather_data,id.vars=c(1:4))
hourly_region_averages = dcast(long_weather, date+hour~variable,fun.aggregate=mean)
View(hourly_region_averages)
Warning message in melt.data.table(weather_data, id.vars = c(1:4)): “'measure.vars' [dswrf_surface, tcdc_low.cloud.layer, tcdc_middle.cloud.layer, tcdc_high.cloud.layer, ...] are not all of the same type. By order of hierarchy, the molten data value column will be of type 'double'. All measure variables not of type 'double' will be coerced too. Check DETAILS in ?melt.data.table for more on coercion.”
| date | hour | dswrf_surface | tcdc_low.cloud.layer | tcdc_middle.cloud.layer | tcdc_high.cloud.layer | tcdc_entire.atmosphere | uswrf_top_of_atmosphere | csnow_surface | dlwrf_surface | uswrf_surface | tmp_surface |
|---|---|---|---|---|---|---|---|---|---|---|---|
| <IDate> | <int> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> |
| 2022-01-01 | 4 | 0.0000 | 2.384 | 5.944 | 4.604 | 14.296 | 0.00000 | 0.00 | 227.9990 | 0.00000 | 269.220 |
| 2022-01-01 | 5 | 0.0000 | 2.784 | 4.324 | 10.636 | 19.272 | 0.00000 | 0.00 | 227.7740 | 0.00000 | 269.104 |
| 2022-01-01 | 6 | 0.0000 | 2.964 | 5.372 | 11.688 | 21.772 | 0.00000 | 0.00 | 227.7640 | 0.00000 | 269.035 |
| 2022-01-01 | 7 | 0.0000 | 3.284 | 9.212 | 20.736 | 31.992 | 0.00000 | 0.00 | 228.1960 | 0.00000 | 269.001 |
| 2022-01-01 | 8 | 0.0000 | 3.672 | 11.252 | 26.432 | 38.376 | 0.00000 | 0.00 | 228.6570 | 0.00000 | 269.002 |
| 2022-01-01 | 9 | 7.3688 | 4.120 | 10.880 | 35.088 | 45.856 | 8.96704 | 0.00 | 229.4160 | 2.41280 | 271.634 |
| 2022-01-01 | 10 | 180.1384 | 4.180 | 13.748 | 80.764 | 85.364 | 135.20448 | 0.00 | 237.2910 | 59.18848 | 275.786 |
| 2022-01-01 | 11 | 254.7928 | 3.972 | 16.520 | 69.392 | 75.468 | 152.98944 | 0.00 | 237.3870 | 82.53120 | 278.553 |
| 2022-01-01 | 12 | 312.5520 | 4.408 | 23.468 | 60.516 | 70.368 | 167.55392 | 0.00 | 238.3870 | 99.73056 | 280.215 |
| 2022-01-01 | 13 | 347.4144 | 5.348 | 32.996 | 54.852 | 70.452 | 180.69312 | 0.00 | 240.8870 | 109.43168 | 280.609 |
| 2022-01-01 | 14 | 364.6544 | 6.772 | 39.460 | 45.880 | 71.136 | 187.03872 | 0.00 | 243.0150 | 113.51040 | 280.277 |
| 2022-01-01 | 15 | 362.2984 | 9.116 | 43.452 | 39.864 | 71.612 | 188.88640 | 0.00 | 245.2960 | 112.10304 | 279.165 |
| 2022-01-01 | 16 | 221.4584 | 37.100 | 82.296 | 12.680 | 89.116 | 168.56448 | 0.00 | 260.7560 | 66.06976 | 277.059 |
| 2022-01-01 | 17 | 157.6448 | 37.032 | 78.252 | 17.376 | 85.688 | 133.18080 | 0.00 | 258.9600 | 47.22432 | 273.467 |
| 2022-01-01 | 18 | 107.3080 | 35.876 | 74.752 | 27.016 | 83.300 | 92.25216 | 0.00 | 257.2560 | 32.11968 | 271.659 |
| 2022-01-01 | 19 | 80.4792 | 35.080 | 72.480 | 37.268 | 83.508 | 69.18720 | 0.00 | 256.5060 | 24.08960 | 271.537 |
| 2022-01-01 | 20 | 64.3864 | 36.096 | 75.236 | 44.544 | 85.644 | 55.34976 | 0.00 | 257.7050 | 19.27104 | 271.715 |
| 2022-01-01 | 21 | 53.6528 | 37.028 | 77.632 | 49.756 | 87.520 | 46.12352 | 0.00 | 259.4850 | 16.06016 | 271.855 |
| 2022-01-01 | 22 | 0.0000 | 49.732 | 90.688 | 68.948 | 98.276 | 0.00000 | 0.00 | 272.8750 | 0.00000 | 272.029 |
| 2022-01-01 | 23 | 0.0000 | 56.532 | 92.580 | 69.140 | 98.380 | 0.00000 | 0.00 | 274.7220 | 0.00000 | 272.231 |
| 2022-01-02 | 0 | 0.0000 | 66.812 | 94.140 | 76.844 | 98.760 | 0.00000 | 0.04 | 280.7740 | 0.00000 | 272.513 |
| 2022-01-02 | 1 | 0.0000 | 71.096 | 94.212 | 80.404 | 98.492 | 0.00000 | 0.20 | 283.7540 | 0.00000 | 272.616 |
| 2022-01-02 | 2 | 0.0000 | 74.580 | 94.000 | 84.192 | 98.792 | 0.00000 | 0.32 | 287.3460 | 0.00000 | 273.024 |
| 2022-01-02 | 3 | 0.0000 | 77.660 | 94.556 | 86.644 | 98.980 | 0.00000 | 0.48 | 291.0600 | 0.00000 | 273.426 |
| 2022-01-02 | 4 | 0.0000 | 93.816 | 91.588 | 99.228 | 100.000 | 0.00000 | 0.60 | 312.8040 | 0.00000 | 273.605 |
| 2022-01-02 | 5 | 0.0000 | 92.820 | 88.048 | 99.388 | 100.000 | 0.00000 | 0.60 | 311.7720 | 0.00000 | 273.537 |
| 2022-01-02 | 6 | 0.0000 | 93.304 | 89.960 | 99.588 | 100.000 | 0.00000 | 0.68 | 312.2060 | 0.00000 | 273.710 |
| 2022-01-02 | 7 | 0.0000 | 93.492 | 91.856 | 99.696 | 100.000 | 0.00000 | 0.64 | 312.9950 | 0.00000 | 273.837 |
| 2022-01-02 | 8 | 0.0000 | 93.520 | 93.408 | 99.756 | 100.000 | 0.00000 | 0.64 | 313.6920 | 0.00000 | 273.885 |
| 2022-01-02 | 9 | 1.1968 | 93.872 | 94.488 | 99.656 | 100.000 | 13.37856 | 0.64 | 314.2307 | 0.53568 | 274.263 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
| 2024-05-19 | 16 | 741.68400 | 3.748 | 12.400 | 13.308 | 26.132 | 184.14656 | 0 | 315.8730 | 129.28704 | 300.115 |
| 2024-05-19 | 17 | 653.24720 | 4.836 | 13.920 | 15.876 | 30.608 | 177.16928 | 0 | 315.2560 | 119.13024 | 297.226 |
| 2024-05-19 | 18 | 551.01840 | 6.664 | 17.332 | 22.384 | 39.596 | 171.59360 | 0 | 315.9100 | 104.58816 | 293.967 |
| 2024-05-19 | 19 | 451.50800 | 7.392 | 17.692 | 33.540 | 48.368 | 157.55328 | 0 | 315.3600 | 87.64096 | 290.504 |
| 2024-05-19 | 20 | 365.32160 | 8.288 | 16.824 | 44.928 | 56.940 | 132.32256 | 0 | 314.6400 | 70.99328 | 287.699 |
| 2024-05-19 | 21 | 304.43328 | 7.720 | 14.724 | 52.296 | 62.416 | 110.26688 | 0 | 313.1420 | 59.15904 | 286.748 |
| 2024-05-19 | 22 | 0.00000 | 0.932 | 0.144 | 86.084 | 86.800 | 0.00000 | 0 | 302.1770 | 0.00000 | 286.008 |
| 2024-05-19 | 23 | 0.00000 | 0.544 | 0.144 | 75.572 | 76.660 | 0.00000 | 0 | 300.4080 | 0.00000 | 285.319 |
| 2024-05-20 | 0 | 0.00000 | 0.364 | 0.300 | 64.076 | 65.364 | 0.00000 | 0 | 298.4437 | 0.00000 | 284.668 |
| 2024-05-20 | 1 | 0.00000 | 0.268 | 0.300 | 52.524 | 53.820 | 0.00000 | 0 | 296.7120 | 0.00000 | 284.064 |
| 2024-05-20 | 2 | 0.00000 | 0.216 | 0.304 | 44.528 | 45.840 | 0.00000 | 0 | 295.4120 | 0.00000 | 283.597 |
| 2024-05-20 | 3 | 0.00000 | 0.180 | 0.464 | 39.212 | 40.740 | 0.00000 | 0 | 294.4750 | 0.00000 | 283.175 |
| 2024-05-20 | 4 | 0.00000 | 0.104 | 3.632 | 33.512 | 37.208 | 0.00000 | 0 | 289.2320 | 0.00000 | 282.832 |
| 2024-05-20 | 5 | 0.00000 | 0.260 | 3.596 | 43.000 | 46.432 | 0.00000 | 0 | 289.8500 | 0.00000 | 282.536 |
| 2024-05-20 | 6 | 3.35600 | 0.904 | 5.816 | 39.872 | 45.740 | 4.07168 | 0 | 290.3950 | 0.77888 | 283.660 |
| 2024-05-20 | 7 | 35.98848 | 1.968 | 7.488 | 33.316 | 41.208 | 23.18208 | 0 | 290.5790 | 9.36448 | 288.299 |
| 2024-05-20 | 8 | 88.90048 | 3.752 | 11.000 | 29.284 | 41.020 | 48.74688 | 0 | 293.7390 | 20.81152 | 292.868 |
| 2024-05-20 | 9 | 148.04160 | 5.272 | 15.512 | 26.252 | 42.420 | 77.78240 | 0 | 297.8070 | 31.73120 | 296.414 |
| 2024-05-20 | 10 | 623.06000 | 16.520 | 34.476 | 12.796 | 45.516 | 239.66880 | 0 | 318.9140 | 109.52576 | 300.288 |
| 2024-05-20 | 11 | 697.54480 | 10.808 | 32.828 | 11.096 | 43.596 | 243.46960 | 0 | 321.0400 | 118.63680 | 303.036 |
| 2024-05-20 | 12 | 757.79520 | 10.524 | 30.456 | 11.920 | 42.148 | 243.22688 | 0 | 322.7000 | 125.46112 | 304.884 |
| 2024-05-20 | 13 | 786.57280 | 12.068 | 29.524 | 13.816 | 41.716 | 251.42784 | 0 | 325.2360 | 128.43776 | 305.034 |
| 2024-05-20 | 14 | 780.91120 | 14.284 | 31.840 | 17.044 | 45.484 | 270.32896 | 0 | 329.1890 | 127.48992 | 303.220 |
| 2024-05-20 | 15 | 748.39680 | 17.028 | 35.120 | 22.488 | 52.052 | 294.02496 | 0 | 333.3480 | 123.05600 | 300.109 |
| 2024-05-20 | 16 | 428.61520 | 34.728 | 57.824 | 49.888 | 92.296 | 423.56096 | 0 | 358.2960 | 79.20320 | 297.543 |
| 2024-05-20 | 17 | 367.40000 | 34.492 | 62.112 | 44.068 | 93.800 | 396.17088 | 0 | 359.0440 | 69.55136 | 295.564 |
| 2024-05-20 | 18 | 312.24160 | 34.684 | 66.848 | 42.728 | 94.424 | 354.52992 | 0 | 358.4140 | 60.48000 | 293.613 |
| 2024-05-20 | 19 | 261.56960 | 31.080 | 66.276 | 49.852 | 93.684 | 303.41760 | 0 | 354.7380 | 51.42720 | 291.316 |
| 2024-05-20 | 20 | 212.68736 | 30.616 | 64.848 | 57.588 | 93.680 | 249.77856 | 0 | 351.5110 | 41.86048 | 288.794 |
| 2024-05-20 | 21 | 177.24096 | 29.016 | 62.472 | 63.936 | 94.160 | 208.14784 | 0 | 347.9070 | 34.88192 | 288.011 |
# Merge with hourly_region_averages
template_dt_with_weather <- merge(template_dt, hourly_region_averages, by = c('date', 'hour'), all.x = TRUE)
View(template_dt_with_weather)
#Order it by date and hour
template_dt_with_weather = template_dt_with_weather[order(date,hour)]
template_dt_with_aggregate <- template_dt_with_weather
template_dt_with_aggregate$hourly_cloud_average <- rowMeans(select(template_dt_with_aggregate, starts_with("tcdc_")), na.rm = TRUE)
template_dt_with_aggregate$hourly_max_t <- rowMaxs(as.matrix(select(template_dt_with_aggregate, starts_with("tmp_"))), na.rm = TRUE)
View(template_dt_with_aggregate)
| date | hour | production | dswrf_surface | tcdc_low.cloud.layer | tcdc_middle.cloud.layer | tcdc_high.cloud.layer | tcdc_entire.atmosphere | uswrf_top_of_atmosphere | csnow_surface | dlwrf_surface | uswrf_surface | tmp_surface |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <IDate> | <int> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> |
| 2022-01-01 | 4 | 0.00 | 0.0000 | 2.384 | 5.944 | 4.604 | 14.296 | 0.00000 | 0.00 | 227.9990 | 0.00000 | 269.220 |
| 2022-01-01 | 5 | 0.00 | 0.0000 | 2.784 | 4.324 | 10.636 | 19.272 | 0.00000 | 0.00 | 227.7740 | 0.00000 | 269.104 |
| 2022-01-01 | 6 | 0.00 | 0.0000 | 2.964 | 5.372 | 11.688 | 21.772 | 0.00000 | 0.00 | 227.7640 | 0.00000 | 269.035 |
| 2022-01-01 | 7 | 0.00 | 0.0000 | 3.284 | 9.212 | 20.736 | 31.992 | 0.00000 | 0.00 | 228.1960 | 0.00000 | 269.001 |
| 2022-01-01 | 8 | 3.40 | 0.0000 | 3.672 | 11.252 | 26.432 | 38.376 | 0.00000 | 0.00 | 228.6570 | 0.00000 | 269.002 |
| 2022-01-01 | 9 | 6.80 | 7.3688 | 4.120 | 10.880 | 35.088 | 45.856 | 8.96704 | 0.00 | 229.4160 | 2.41280 | 271.634 |
| 2022-01-01 | 10 | 9.38 | 180.1384 | 4.180 | 13.748 | 80.764 | 85.364 | 135.20448 | 0.00 | 237.2910 | 59.18848 | 275.786 |
| 2022-01-01 | 11 | 7.65 | 254.7928 | 3.972 | 16.520 | 69.392 | 75.468 | 152.98944 | 0.00 | 237.3870 | 82.53120 | 278.553 |
| 2022-01-01 | 12 | 6.80 | 312.5520 | 4.408 | 23.468 | 60.516 | 70.368 | 167.55392 | 0.00 | 238.3870 | 99.73056 | 280.215 |
| 2022-01-01 | 13 | 5.10 | 347.4144 | 5.348 | 32.996 | 54.852 | 70.452 | 180.69312 | 0.00 | 240.8870 | 109.43168 | 280.609 |
| 2022-01-01 | 14 | 5.10 | 364.6544 | 6.772 | 39.460 | 45.880 | 71.136 | 187.03872 | 0.00 | 243.0150 | 113.51040 | 280.277 |
| 2022-01-01 | 15 | 1.70 | 362.2984 | 9.116 | 43.452 | 39.864 | 71.612 | 188.88640 | 0.00 | 245.2960 | 112.10304 | 279.165 |
| 2022-01-01 | 16 | 0.00 | 221.4584 | 37.100 | 82.296 | 12.680 | 89.116 | 168.56448 | 0.00 | 260.7560 | 66.06976 | 277.059 |
| 2022-01-01 | 17 | 0.00 | 157.6448 | 37.032 | 78.252 | 17.376 | 85.688 | 133.18080 | 0.00 | 258.9600 | 47.22432 | 273.467 |
| 2022-01-01 | 18 | 0.00 | 107.3080 | 35.876 | 74.752 | 27.016 | 83.300 | 92.25216 | 0.00 | 257.2560 | 32.11968 | 271.659 |
| 2022-01-01 | 19 | 0.00 | 80.4792 | 35.080 | 72.480 | 37.268 | 83.508 | 69.18720 | 0.00 | 256.5060 | 24.08960 | 271.537 |
| 2022-01-01 | 20 | 0.00 | 64.3864 | 36.096 | 75.236 | 44.544 | 85.644 | 55.34976 | 0.00 | 257.7050 | 19.27104 | 271.715 |
| 2022-01-01 | 21 | 0.00 | 53.6528 | 37.028 | 77.632 | 49.756 | 87.520 | 46.12352 | 0.00 | 259.4850 | 16.06016 | 271.855 |
| 2022-01-01 | 22 | 0.00 | 0.0000 | 49.732 | 90.688 | 68.948 | 98.276 | 0.00000 | 0.00 | 272.8750 | 0.00000 | 272.029 |
| 2022-01-01 | 23 | 0.00 | 0.0000 | 56.532 | 92.580 | 69.140 | 98.380 | 0.00000 | 0.00 | 274.7220 | 0.00000 | 272.231 |
| 2022-01-02 | 0 | 0.00 | 0.0000 | 66.812 | 94.140 | 76.844 | 98.760 | 0.00000 | 0.04 | 280.7740 | 0.00000 | 272.513 |
| 2022-01-02 | 1 | 0.00 | 0.0000 | 71.096 | 94.212 | 80.404 | 98.492 | 0.00000 | 0.20 | 283.7540 | 0.00000 | 272.616 |
| 2022-01-02 | 2 | 0.00 | 0.0000 | 74.580 | 94.000 | 84.192 | 98.792 | 0.00000 | 0.32 | 287.3460 | 0.00000 | 273.024 |
| 2022-01-02 | 3 | 0.00 | 0.0000 | 77.660 | 94.556 | 86.644 | 98.980 | 0.00000 | 0.48 | 291.0600 | 0.00000 | 273.426 |
| 2022-01-02 | 4 | 0.00 | 0.0000 | 93.816 | 91.588 | 99.228 | 100.000 | 0.00000 | 0.60 | 312.8040 | 0.00000 | 273.605 |
| 2022-01-02 | 5 | 0.00 | 0.0000 | 92.820 | 88.048 | 99.388 | 100.000 | 0.00000 | 0.60 | 311.7720 | 0.00000 | 273.537 |
| 2022-01-02 | 6 | 0.00 | 0.0000 | 93.304 | 89.960 | 99.588 | 100.000 | 0.00000 | 0.68 | 312.2060 | 0.00000 | 273.710 |
| 2022-01-02 | 7 | 0.00 | 0.0000 | 93.492 | 91.856 | 99.696 | 100.000 | 0.00000 | 0.64 | 312.9950 | 0.00000 | 273.837 |
| 2022-01-02 | 8 | 0.00 | 0.0000 | 93.520 | 93.408 | 99.756 | 100.000 | 0.00000 | 0.64 | 313.6920 | 0.00000 | 273.885 |
| 2022-01-02 | 9 | 0.85 | 1.1968 | 93.872 | 94.488 | 99.656 | 100.000 | 13.37856 | 0.64 | 314.2307 | 0.53568 | 274.263 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
| 2024-05-19 | 16 | NA | 741.68400 | 3.748 | 12.400 | 13.308 | 26.132 | 184.14656 | 0 | 315.8730 | 129.28704 | 300.115 |
| 2024-05-19 | 17 | NA | 653.24720 | 4.836 | 13.920 | 15.876 | 30.608 | 177.16928 | 0 | 315.2560 | 119.13024 | 297.226 |
| 2024-05-19 | 18 | NA | 551.01840 | 6.664 | 17.332 | 22.384 | 39.596 | 171.59360 | 0 | 315.9100 | 104.58816 | 293.967 |
| 2024-05-19 | 19 | NA | 451.50800 | 7.392 | 17.692 | 33.540 | 48.368 | 157.55328 | 0 | 315.3600 | 87.64096 | 290.504 |
| 2024-05-19 | 20 | NA | 365.32160 | 8.288 | 16.824 | 44.928 | 56.940 | 132.32256 | 0 | 314.6400 | 70.99328 | 287.699 |
| 2024-05-19 | 21 | NA | 304.43328 | 7.720 | 14.724 | 52.296 | 62.416 | 110.26688 | 0 | 313.1420 | 59.15904 | 286.748 |
| 2024-05-19 | 22 | NA | 0.00000 | 0.932 | 0.144 | 86.084 | 86.800 | 0.00000 | 0 | 302.1770 | 0.00000 | 286.008 |
| 2024-05-19 | 23 | NA | 0.00000 | 0.544 | 0.144 | 75.572 | 76.660 | 0.00000 | 0 | 300.4080 | 0.00000 | 285.319 |
| 2024-05-20 | 0 | NA | 0.00000 | 0.364 | 0.300 | 64.076 | 65.364 | 0.00000 | 0 | 298.4437 | 0.00000 | 284.668 |
| 2024-05-20 | 1 | NA | 0.00000 | 0.268 | 0.300 | 52.524 | 53.820 | 0.00000 | 0 | 296.7120 | 0.00000 | 284.064 |
| 2024-05-20 | 2 | NA | 0.00000 | 0.216 | 0.304 | 44.528 | 45.840 | 0.00000 | 0 | 295.4120 | 0.00000 | 283.597 |
| 2024-05-20 | 3 | NA | 0.00000 | 0.180 | 0.464 | 39.212 | 40.740 | 0.00000 | 0 | 294.4750 | 0.00000 | 283.175 |
| 2024-05-20 | 4 | NA | 0.00000 | 0.104 | 3.632 | 33.512 | 37.208 | 0.00000 | 0 | 289.2320 | 0.00000 | 282.832 |
| 2024-05-20 | 5 | NA | 0.00000 | 0.260 | 3.596 | 43.000 | 46.432 | 0.00000 | 0 | 289.8500 | 0.00000 | 282.536 |
| 2024-05-20 | 6 | NA | 3.35600 | 0.904 | 5.816 | 39.872 | 45.740 | 4.07168 | 0 | 290.3950 | 0.77888 | 283.660 |
| 2024-05-20 | 7 | NA | 35.98848 | 1.968 | 7.488 | 33.316 | 41.208 | 23.18208 | 0 | 290.5790 | 9.36448 | 288.299 |
| 2024-05-20 | 8 | NA | 88.90048 | 3.752 | 11.000 | 29.284 | 41.020 | 48.74688 | 0 | 293.7390 | 20.81152 | 292.868 |
| 2024-05-20 | 9 | NA | 148.04160 | 5.272 | 15.512 | 26.252 | 42.420 | 77.78240 | 0 | 297.8070 | 31.73120 | 296.414 |
| 2024-05-20 | 10 | NA | 623.06000 | 16.520 | 34.476 | 12.796 | 45.516 | 239.66880 | 0 | 318.9140 | 109.52576 | 300.288 |
| 2024-05-20 | 11 | NA | 697.54480 | 10.808 | 32.828 | 11.096 | 43.596 | 243.46960 | 0 | 321.0400 | 118.63680 | 303.036 |
| 2024-05-20 | 12 | NA | 757.79520 | 10.524 | 30.456 | 11.920 | 42.148 | 243.22688 | 0 | 322.7000 | 125.46112 | 304.884 |
| 2024-05-20 | 13 | NA | 786.57280 | 12.068 | 29.524 | 13.816 | 41.716 | 251.42784 | 0 | 325.2360 | 128.43776 | 305.034 |
| 2024-05-20 | 14 | NA | 780.91120 | 14.284 | 31.840 | 17.044 | 45.484 | 270.32896 | 0 | 329.1890 | 127.48992 | 303.220 |
| 2024-05-20 | 15 | NA | 748.39680 | 17.028 | 35.120 | 22.488 | 52.052 | 294.02496 | 0 | 333.3480 | 123.05600 | 300.109 |
| 2024-05-20 | 16 | NA | 428.61520 | 34.728 | 57.824 | 49.888 | 92.296 | 423.56096 | 0 | 358.2960 | 79.20320 | 297.543 |
| 2024-05-20 | 17 | NA | 367.40000 | 34.492 | 62.112 | 44.068 | 93.800 | 396.17088 | 0 | 359.0440 | 69.55136 | 295.564 |
| 2024-05-20 | 18 | NA | 312.24160 | 34.684 | 66.848 | 42.728 | 94.424 | 354.52992 | 0 | 358.4140 | 60.48000 | 293.613 |
| 2024-05-20 | 19 | NA | 261.56960 | 31.080 | 66.276 | 49.852 | 93.684 | 303.41760 | 0 | 354.7380 | 51.42720 | 291.316 |
| 2024-05-20 | 20 | NA | 212.68736 | 30.616 | 64.848 | 57.588 | 93.680 | 249.77856 | 0 | 351.5110 | 41.86048 | 288.794 |
| 2024-05-20 | 21 | NA | 177.24096 | 29.016 | 62.472 | 63.936 | 94.160 | 208.14784 | 0 | 347.9070 | 34.88192 | 288.011 |
| date | hour | production | dswrf_surface | tcdc_low.cloud.layer | tcdc_middle.cloud.layer | tcdc_high.cloud.layer | tcdc_entire.atmosphere | uswrf_top_of_atmosphere | csnow_surface | dlwrf_surface | uswrf_surface | tmp_surface | hourly_cloud_average | hourly_max_t |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <IDate> | <int> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> |
| 2022-01-01 | 4 | 0.00 | 0.0000 | 2.384 | 5.944 | 4.604 | 14.296 | 0.00000 | 0.00 | 227.9990 | 0.00000 | 269.220 | 6.807 | 269.220 |
| 2022-01-01 | 5 | 0.00 | 0.0000 | 2.784 | 4.324 | 10.636 | 19.272 | 0.00000 | 0.00 | 227.7740 | 0.00000 | 269.104 | 9.254 | 269.104 |
| 2022-01-01 | 6 | 0.00 | 0.0000 | 2.964 | 5.372 | 11.688 | 21.772 | 0.00000 | 0.00 | 227.7640 | 0.00000 | 269.035 | 10.449 | 269.035 |
| 2022-01-01 | 7 | 0.00 | 0.0000 | 3.284 | 9.212 | 20.736 | 31.992 | 0.00000 | 0.00 | 228.1960 | 0.00000 | 269.001 | 16.306 | 269.001 |
| 2022-01-01 | 8 | 3.40 | 0.0000 | 3.672 | 11.252 | 26.432 | 38.376 | 0.00000 | 0.00 | 228.6570 | 0.00000 | 269.002 | 19.933 | 269.002 |
| 2022-01-01 | 9 | 6.80 | 7.3688 | 4.120 | 10.880 | 35.088 | 45.856 | 8.96704 | 0.00 | 229.4160 | 2.41280 | 271.634 | 23.986 | 271.634 |
| 2022-01-01 | 10 | 9.38 | 180.1384 | 4.180 | 13.748 | 80.764 | 85.364 | 135.20448 | 0.00 | 237.2910 | 59.18848 | 275.786 | 46.014 | 275.786 |
| 2022-01-01 | 11 | 7.65 | 254.7928 | 3.972 | 16.520 | 69.392 | 75.468 | 152.98944 | 0.00 | 237.3870 | 82.53120 | 278.553 | 41.338 | 278.553 |
| 2022-01-01 | 12 | 6.80 | 312.5520 | 4.408 | 23.468 | 60.516 | 70.368 | 167.55392 | 0.00 | 238.3870 | 99.73056 | 280.215 | 39.690 | 280.215 |
| 2022-01-01 | 13 | 5.10 | 347.4144 | 5.348 | 32.996 | 54.852 | 70.452 | 180.69312 | 0.00 | 240.8870 | 109.43168 | 280.609 | 40.912 | 280.609 |
| 2022-01-01 | 14 | 5.10 | 364.6544 | 6.772 | 39.460 | 45.880 | 71.136 | 187.03872 | 0.00 | 243.0150 | 113.51040 | 280.277 | 40.812 | 280.277 |
| 2022-01-01 | 15 | 1.70 | 362.2984 | 9.116 | 43.452 | 39.864 | 71.612 | 188.88640 | 0.00 | 245.2960 | 112.10304 | 279.165 | 41.011 | 279.165 |
| 2022-01-01 | 16 | 0.00 | 221.4584 | 37.100 | 82.296 | 12.680 | 89.116 | 168.56448 | 0.00 | 260.7560 | 66.06976 | 277.059 | 55.298 | 277.059 |
| 2022-01-01 | 17 | 0.00 | 157.6448 | 37.032 | 78.252 | 17.376 | 85.688 | 133.18080 | 0.00 | 258.9600 | 47.22432 | 273.467 | 54.587 | 273.467 |
| 2022-01-01 | 18 | 0.00 | 107.3080 | 35.876 | 74.752 | 27.016 | 83.300 | 92.25216 | 0.00 | 257.2560 | 32.11968 | 271.659 | 55.236 | 271.659 |
| 2022-01-01 | 19 | 0.00 | 80.4792 | 35.080 | 72.480 | 37.268 | 83.508 | 69.18720 | 0.00 | 256.5060 | 24.08960 | 271.537 | 57.084 | 271.537 |
| 2022-01-01 | 20 | 0.00 | 64.3864 | 36.096 | 75.236 | 44.544 | 85.644 | 55.34976 | 0.00 | 257.7050 | 19.27104 | 271.715 | 60.380 | 271.715 |
| 2022-01-01 | 21 | 0.00 | 53.6528 | 37.028 | 77.632 | 49.756 | 87.520 | 46.12352 | 0.00 | 259.4850 | 16.06016 | 271.855 | 62.984 | 271.855 |
| 2022-01-01 | 22 | 0.00 | 0.0000 | 49.732 | 90.688 | 68.948 | 98.276 | 0.00000 | 0.00 | 272.8750 | 0.00000 | 272.029 | 76.911 | 272.029 |
| 2022-01-01 | 23 | 0.00 | 0.0000 | 56.532 | 92.580 | 69.140 | 98.380 | 0.00000 | 0.00 | 274.7220 | 0.00000 | 272.231 | 79.158 | 272.231 |
| 2022-01-02 | 0 | 0.00 | 0.0000 | 66.812 | 94.140 | 76.844 | 98.760 | 0.00000 | 0.04 | 280.7740 | 0.00000 | 272.513 | 84.139 | 272.513 |
| 2022-01-02 | 1 | 0.00 | 0.0000 | 71.096 | 94.212 | 80.404 | 98.492 | 0.00000 | 0.20 | 283.7540 | 0.00000 | 272.616 | 86.051 | 272.616 |
| 2022-01-02 | 2 | 0.00 | 0.0000 | 74.580 | 94.000 | 84.192 | 98.792 | 0.00000 | 0.32 | 287.3460 | 0.00000 | 273.024 | 87.891 | 273.024 |
| 2022-01-02 | 3 | 0.00 | 0.0000 | 77.660 | 94.556 | 86.644 | 98.980 | 0.00000 | 0.48 | 291.0600 | 0.00000 | 273.426 | 89.460 | 273.426 |
| 2022-01-02 | 4 | 0.00 | 0.0000 | 93.816 | 91.588 | 99.228 | 100.000 | 0.00000 | 0.60 | 312.8040 | 0.00000 | 273.605 | 96.158 | 273.605 |
| 2022-01-02 | 5 | 0.00 | 0.0000 | 92.820 | 88.048 | 99.388 | 100.000 | 0.00000 | 0.60 | 311.7720 | 0.00000 | 273.537 | 95.064 | 273.537 |
| 2022-01-02 | 6 | 0.00 | 0.0000 | 93.304 | 89.960 | 99.588 | 100.000 | 0.00000 | 0.68 | 312.2060 | 0.00000 | 273.710 | 95.713 | 273.710 |
| 2022-01-02 | 7 | 0.00 | 0.0000 | 93.492 | 91.856 | 99.696 | 100.000 | 0.00000 | 0.64 | 312.9950 | 0.00000 | 273.837 | 96.261 | 273.837 |
| 2022-01-02 | 8 | 0.00 | 0.0000 | 93.520 | 93.408 | 99.756 | 100.000 | 0.00000 | 0.64 | 313.6920 | 0.00000 | 273.885 | 96.671 | 273.885 |
| 2022-01-02 | 9 | 0.85 | 1.1968 | 93.872 | 94.488 | 99.656 | 100.000 | 13.37856 | 0.64 | 314.2307 | 0.53568 | 274.263 | 97.004 | 274.263 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
| 2024-05-19 | 16 | NA | 741.68400 | 3.748 | 12.400 | 13.308 | 26.132 | 184.14656 | 0 | 315.8730 | 129.28704 | 300.115 | 13.897 | 300.115 |
| 2024-05-19 | 17 | NA | 653.24720 | 4.836 | 13.920 | 15.876 | 30.608 | 177.16928 | 0 | 315.2560 | 119.13024 | 297.226 | 16.310 | 297.226 |
| 2024-05-19 | 18 | NA | 551.01840 | 6.664 | 17.332 | 22.384 | 39.596 | 171.59360 | 0 | 315.9100 | 104.58816 | 293.967 | 21.494 | 293.967 |
| 2024-05-19 | 19 | NA | 451.50800 | 7.392 | 17.692 | 33.540 | 48.368 | 157.55328 | 0 | 315.3600 | 87.64096 | 290.504 | 26.748 | 290.504 |
| 2024-05-19 | 20 | NA | 365.32160 | 8.288 | 16.824 | 44.928 | 56.940 | 132.32256 | 0 | 314.6400 | 70.99328 | 287.699 | 31.745 | 287.699 |
| 2024-05-19 | 21 | NA | 304.43328 | 7.720 | 14.724 | 52.296 | 62.416 | 110.26688 | 0 | 313.1420 | 59.15904 | 286.748 | 34.289 | 286.748 |
| 2024-05-19 | 22 | NA | 0.00000 | 0.932 | 0.144 | 86.084 | 86.800 | 0.00000 | 0 | 302.1770 | 0.00000 | 286.008 | 43.490 | 286.008 |
| 2024-05-19 | 23 | NA | 0.00000 | 0.544 | 0.144 | 75.572 | 76.660 | 0.00000 | 0 | 300.4080 | 0.00000 | 285.319 | 38.230 | 285.319 |
| 2024-05-20 | 0 | NA | 0.00000 | 0.364 | 0.300 | 64.076 | 65.364 | 0.00000 | 0 | 298.4437 | 0.00000 | 284.668 | 32.526 | 284.668 |
| 2024-05-20 | 1 | NA | 0.00000 | 0.268 | 0.300 | 52.524 | 53.820 | 0.00000 | 0 | 296.7120 | 0.00000 | 284.064 | 26.728 | 284.064 |
| 2024-05-20 | 2 | NA | 0.00000 | 0.216 | 0.304 | 44.528 | 45.840 | 0.00000 | 0 | 295.4120 | 0.00000 | 283.597 | 22.722 | 283.597 |
| 2024-05-20 | 3 | NA | 0.00000 | 0.180 | 0.464 | 39.212 | 40.740 | 0.00000 | 0 | 294.4750 | 0.00000 | 283.175 | 20.149 | 283.175 |
| 2024-05-20 | 4 | NA | 0.00000 | 0.104 | 3.632 | 33.512 | 37.208 | 0.00000 | 0 | 289.2320 | 0.00000 | 282.832 | 18.614 | 282.832 |
| 2024-05-20 | 5 | NA | 0.00000 | 0.260 | 3.596 | 43.000 | 46.432 | 0.00000 | 0 | 289.8500 | 0.00000 | 282.536 | 23.322 | 282.536 |
| 2024-05-20 | 6 | NA | 3.35600 | 0.904 | 5.816 | 39.872 | 45.740 | 4.07168 | 0 | 290.3950 | 0.77888 | 283.660 | 23.083 | 283.660 |
| 2024-05-20 | 7 | NA | 35.98848 | 1.968 | 7.488 | 33.316 | 41.208 | 23.18208 | 0 | 290.5790 | 9.36448 | 288.299 | 20.995 | 288.299 |
| 2024-05-20 | 8 | NA | 88.90048 | 3.752 | 11.000 | 29.284 | 41.020 | 48.74688 | 0 | 293.7390 | 20.81152 | 292.868 | 21.264 | 292.868 |
| 2024-05-20 | 9 | NA | 148.04160 | 5.272 | 15.512 | 26.252 | 42.420 | 77.78240 | 0 | 297.8070 | 31.73120 | 296.414 | 22.364 | 296.414 |
| 2024-05-20 | 10 | NA | 623.06000 | 16.520 | 34.476 | 12.796 | 45.516 | 239.66880 | 0 | 318.9140 | 109.52576 | 300.288 | 27.327 | 300.288 |
| 2024-05-20 | 11 | NA | 697.54480 | 10.808 | 32.828 | 11.096 | 43.596 | 243.46960 | 0 | 321.0400 | 118.63680 | 303.036 | 24.582 | 303.036 |
| 2024-05-20 | 12 | NA | 757.79520 | 10.524 | 30.456 | 11.920 | 42.148 | 243.22688 | 0 | 322.7000 | 125.46112 | 304.884 | 23.762 | 304.884 |
| 2024-05-20 | 13 | NA | 786.57280 | 12.068 | 29.524 | 13.816 | 41.716 | 251.42784 | 0 | 325.2360 | 128.43776 | 305.034 | 24.281 | 305.034 |
| 2024-05-20 | 14 | NA | 780.91120 | 14.284 | 31.840 | 17.044 | 45.484 | 270.32896 | 0 | 329.1890 | 127.48992 | 303.220 | 27.163 | 303.220 |
| 2024-05-20 | 15 | NA | 748.39680 | 17.028 | 35.120 | 22.488 | 52.052 | 294.02496 | 0 | 333.3480 | 123.05600 | 300.109 | 31.672 | 300.109 |
| 2024-05-20 | 16 | NA | 428.61520 | 34.728 | 57.824 | 49.888 | 92.296 | 423.56096 | 0 | 358.2960 | 79.20320 | 297.543 | 58.684 | 297.543 |
| 2024-05-20 | 17 | NA | 367.40000 | 34.492 | 62.112 | 44.068 | 93.800 | 396.17088 | 0 | 359.0440 | 69.55136 | 295.564 | 58.618 | 295.564 |
| 2024-05-20 | 18 | NA | 312.24160 | 34.684 | 66.848 | 42.728 | 94.424 | 354.52992 | 0 | 358.4140 | 60.48000 | 293.613 | 59.671 | 293.613 |
| 2024-05-20 | 19 | NA | 261.56960 | 31.080 | 66.276 | 49.852 | 93.684 | 303.41760 | 0 | 354.7380 | 51.42720 | 291.316 | 60.223 | 291.316 |
| 2024-05-20 | 20 | NA | 212.68736 | 30.616 | 64.848 | 57.588 | 93.680 | 249.77856 | 0 | 351.5110 | 41.86048 | 288.794 | 61.683 | 288.794 |
| 2024-05-20 | 21 | NA | 177.24096 | 29.016 | 62.472 | 63.936 | 94.160 | 208.14784 | 0 | 347.9070 | 34.88192 | 288.011 | 62.396 | 288.011 |
# Use select to exclude coumns starting with "tcdc_" to focus on average
template_dt_with_aggregate <- template_dt_with_aggregate %>%
select(-starts_with("tcdc_"))
#-starts_with("tmp_"))
# Read your holiday dataset
#holiday_data <- read.csv("/Users/ecemozturk/Desktop/Short Holiday.csv")
holiday_data <- read_excel("/Users/ecemozturk/Desktop/Short Holiday12.xlsx")
# Convert date column to Date format
holiday_data$date <- as.Date(holiday_data$date, format = "%d.%m.%Y", na.rm = TRUE)
# Merge holiday data with production dataset based on the date column
all_data <- merge(template_dt_with_aggregate, holiday_data, by = "date", all.x = TRUE)
all_data = all_data[!is.na(production)]
all_data_daily <- all_data[all_data$date == day_before_tday, ]
available_data = all_data[!is.na(production) & hour >= 4 & hour <= 19,]
#to_be_forecasted = template_dt_with_weather[is.na(production)]
to_be_forecasted <- all_data[is.na(production) & hour >= 4 & hour <= 19, ]
View(all_data)
| date | hour | production | dswrf_surface | uswrf_top_of_atmosphere | csnow_surface | dlwrf_surface | uswrf_surface | tmp_surface | hourly_cloud_average | hourly_max_t | is.weekend | is.ramadan | is.religousday | is.nationalday | is.publicholiday |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <IDate> | <int> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> |
| 2022-01-01 | 4 | 0.00 | 0.0000 | 0.00000 | 0.00 | 227.9990 | 0.00000 | 269.220 | 6.807 | 269.220 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 5 | 0.00 | 0.0000 | 0.00000 | 0.00 | 227.7740 | 0.00000 | 269.104 | 9.254 | 269.104 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 6 | 0.00 | 0.0000 | 0.00000 | 0.00 | 227.7640 | 0.00000 | 269.035 | 10.449 | 269.035 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 7 | 0.00 | 0.0000 | 0.00000 | 0.00 | 228.1960 | 0.00000 | 269.001 | 16.306 | 269.001 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 8 | 3.40 | 0.0000 | 0.00000 | 0.00 | 228.6570 | 0.00000 | 269.002 | 19.933 | 269.002 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 9 | 6.80 | 7.3688 | 8.96704 | 0.00 | 229.4160 | 2.41280 | 271.634 | 23.986 | 271.634 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 10 | 9.38 | 180.1384 | 135.20448 | 0.00 | 237.2910 | 59.18848 | 275.786 | 46.014 | 275.786 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 11 | 7.65 | 254.7928 | 152.98944 | 0.00 | 237.3870 | 82.53120 | 278.553 | 41.338 | 278.553 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 12 | 6.80 | 312.5520 | 167.55392 | 0.00 | 238.3870 | 99.73056 | 280.215 | 39.690 | 280.215 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 13 | 5.10 | 347.4144 | 180.69312 | 0.00 | 240.8870 | 109.43168 | 280.609 | 40.912 | 280.609 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 14 | 5.10 | 364.6544 | 187.03872 | 0.00 | 243.0150 | 113.51040 | 280.277 | 40.812 | 280.277 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 15 | 1.70 | 362.2984 | 188.88640 | 0.00 | 245.2960 | 112.10304 | 279.165 | 41.011 | 279.165 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 16 | 0.00 | 221.4584 | 168.56448 | 0.00 | 260.7560 | 66.06976 | 277.059 | 55.298 | 277.059 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 17 | 0.00 | 157.6448 | 133.18080 | 0.00 | 258.9600 | 47.22432 | 273.467 | 54.587 | 273.467 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 18 | 0.00 | 107.3080 | 92.25216 | 0.00 | 257.2560 | 32.11968 | 271.659 | 55.236 | 271.659 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 19 | 0.00 | 80.4792 | 69.18720 | 0.00 | 256.5060 | 24.08960 | 271.537 | 57.084 | 271.537 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 20 | 0.00 | 64.3864 | 55.34976 | 0.00 | 257.7050 | 19.27104 | 271.715 | 60.380 | 271.715 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 21 | 0.00 | 53.6528 | 46.12352 | 0.00 | 259.4850 | 16.06016 | 271.855 | 62.984 | 271.855 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 22 | 0.00 | 0.0000 | 0.00000 | 0.00 | 272.8750 | 0.00000 | 272.029 | 76.911 | 272.029 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-01 | 23 | 0.00 | 0.0000 | 0.00000 | 0.00 | 274.7220 | 0.00000 | 272.231 | 79.158 | 272.231 | 1 | 0 | 0 | 1 | 1 |
| 2022-01-02 | 0 | 0.00 | 0.0000 | 0.00000 | 0.04 | 280.7740 | 0.00000 | 272.513 | 84.139 | 272.513 | 1 | 0 | 0 | 0 | 1 |
| 2022-01-02 | 1 | 0.00 | 0.0000 | 0.00000 | 0.20 | 283.7540 | 0.00000 | 272.616 | 86.051 | 272.616 | 1 | 0 | 0 | 0 | 1 |
| 2022-01-02 | 2 | 0.00 | 0.0000 | 0.00000 | 0.32 | 287.3460 | 0.00000 | 273.024 | 87.891 | 273.024 | 1 | 0 | 0 | 0 | 1 |
| 2022-01-02 | 3 | 0.00 | 0.0000 | 0.00000 | 0.48 | 291.0600 | 0.00000 | 273.426 | 89.460 | 273.426 | 1 | 0 | 0 | 0 | 1 |
| 2022-01-02 | 4 | 0.00 | 0.0000 | 0.00000 | 0.60 | 312.8040 | 0.00000 | 273.605 | 96.158 | 273.605 | 1 | 0 | 0 | 0 | 1 |
| 2022-01-02 | 5 | 0.00 | 0.0000 | 0.00000 | 0.60 | 311.7720 | 0.00000 | 273.537 | 95.064 | 273.537 | 1 | 0 | 0 | 0 | 1 |
| 2022-01-02 | 6 | 0.00 | 0.0000 | 0.00000 | 0.68 | 312.2060 | 0.00000 | 273.710 | 95.713 | 273.710 | 1 | 0 | 0 | 0 | 1 |
| 2022-01-02 | 7 | 0.00 | 0.0000 | 0.00000 | 0.64 | 312.9950 | 0.00000 | 273.837 | 96.261 | 273.837 | 1 | 0 | 0 | 0 | 1 |
| 2022-01-02 | 8 | 0.00 | 0.0000 | 0.00000 | 0.64 | 313.6920 | 0.00000 | 273.885 | 96.671 | 273.885 | 1 | 0 | 0 | 0 | 1 |
| 2022-01-02 | 9 | 0.85 | 1.1968 | 13.37856 | 0.64 | 314.2307 | 0.53568 | 274.263 | 97.004 | 274.263 | 1 | 0 | 0 | 0 | 1 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
| 2024-05-14 | 18 | 0.14 | 568.46960 | 169.69920 | 0 | 264.016 | 110.46400 | 286.9810 | 10.242 | 286.9810 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-14 | 19 | 0.00 | 469.56800 | 150.26752 | 0 | 261.975 | 94.29376 | 283.1640 | 9.776 | 283.1640 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-14 | 20 | 0.00 | 379.66336 | 124.10624 | 0 | 259.407 | 76.52352 | 279.9370 | 8.808 | 279.9370 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-14 | 21 | 0.00 | 316.38528 | 103.42080 | 0 | 257.020 | 63.77152 | 279.0600 | 8.659 | 279.0600 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-14 | 22 | 0.00 | 0.00000 | 0.00000 | 0 | 243.652 | 0.00000 | 278.3870 | 14.836 | 278.3870 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-14 | 23 | 0.00 | 0.00000 | 0.00000 | 0 | 242.829 | 0.00000 | 277.7550 | 15.197 | 277.7550 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 0 | 0.00 | 0.00000 | 0.00000 | 0 | 241.878 | 0.00000 | 277.1440 | 14.826 | 277.1440 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 1 | 0.00 | 0.00000 | 0.00000 | 0 | 241.984 | 0.00000 | 276.8100 | 18.349 | 276.8100 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 2 | 0.00 | 0.00000 | 0.00000 | 0 | 243.318 | 0.00000 | 276.6900 | 24.441 | 276.6900 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 3 | 0.00 | 0.00000 | 0.00000 | 0 | 246.041 | 0.00000 | 276.9520 | 30.521 | 276.9520 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 4 | 0.08 | 0.00000 | 0.00000 | 0 | 265.722 | 0.00000 | 276.9600 | 62.846 | 276.9600 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 5 | 0.84 | 0.00000 | 0.00000 | 0 | 268.502 | 0.00000 | 276.9870 | 64.873 | 276.9870 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 6 | 2.94 | 2.19840 | 4.11968 | 0 | 270.013 | 0.44928 | 277.9850 | 66.249 | 277.9850 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 7 | 6.58 | 23.85280 | 32.98688 | 0 | 271.004 | 4.95872 | 281.3370 | 66.061 | 281.3370 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 8 | 9.13 | 70.55808 | 64.85632 | 0 | 271.370 | 14.99968 | 285.4930 | 65.294 | 285.4930 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 9 | 9.82 | 133.17440 | 93.23584 | 0 | 272.017 | 27.34272 | 290.0600 | 64.018 | 290.0600 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 10 | 9.87 | 671.21840 | 220.83264 | 0 | 271.943 | 122.53632 | 295.1730 | 43.818 | 295.1730 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 11 | 9.82 | 770.01600 | 205.89824 | 0 | 270.559 | 133.23072 | 299.4520 | 33.187 | 299.4520 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 12 | 7.92 | 837.92160 | 199.86368 | 0 | 271.397 | 139.62432 | 301.1160 | 24.561 | 301.1160 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 13 | 7.18 | 877.82480 | 199.39392 | 0 | 273.757 | 143.27296 | 301.0770 | 21.357 | 301.0770 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 14 | 6.00 | 891.83360 | 201.66912 | 0 | 276.612 | 144.44480 | 300.1656 | 20.904 | 300.1656 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 15 | 3.11 | 880.27072 | 207.31840 | 0 | 280.244 | 143.31904 | 298.0160 | 23.678 | 298.0160 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 16 | 0.00 | 645.33440 | 259.09504 | 0 | 305.592 | 117.19168 | 295.4920 | 53.864 | 295.4920 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 17 | 0.00 | 563.07520 | 245.66080 | 0 | 305.315 | 105.66080 | 292.5800 | 54.906 | 292.5800 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 18 | 0.00 | 479.35600 | 223.52064 | 0 | 304.388 | 92.77632 | 289.5560 | 52.225 | 289.5560 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 19 | 0.00 | 394.93824 | 195.49504 | 0 | 302.328 | 77.94688 | 286.2330 | 50.264 | 286.2330 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 20 | 0.00 | 319.39200 | 161.11552 | 0 | 301.056 | 63.09376 | 283.7850 | 49.482 | 283.7850 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 21 | 0.00 | 266.16064 | 134.26176 | 0 | 300.140 | 52.57856 | 283.2000 | 48.427 | 283.2000 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 22 | 0.00 | 0.00000 | 0.00000 | 0 | 290.460 | 0.00000 | 282.4660 | 38.033 | 282.4660 | 0 | 0 | 0 | 0 | 0 |
| 2024-05-15 | 23 | 0.00 | 0.00000 | 0.00000 | 0 | 286.686 | 0.00000 | 281.7430 | 34.667 | 281.7430 | 0 | 0 | 0 | 0 | 0 |
# Plot production data as a line graph
plot(all_data$date, all_data$production, type = "l", xlab = "Date", ylab = "Production", main = "Production Data")
Linear regression model:
To make a prediction by using linear regression, our first attempt was to try to create an aggregate linear regression model by using the parameters defined above. In addition to these parameters, once the aggregate production plot was analyzed, one can clearly detect a shift in the production on the period approximately in between 01-06-2022 and 16-11-2022. To eliminate its effect in our model, we have defined special_period parameter as indicated below. Lastly, we can see a clear pattern where the production reaches up to capacity level of approximately 10 and does not go beyond that level.
Since each hour has different production levels and its parameters take different values, we have created hourly-linear regressions to make forecasts for each hour seperately and eliminate hourly seasonality. We have prepared linear regression models for hours in between 4 & 18 since there is usually no production at hours 0,1,2,3 and 19,20,21,22,23. We have started our analysis by preparing a lm model including all the parameters introduced. After checking the summary and ACF plots of each hour, we have decided to include the ones with large significance and discard the insignificant ones from our model. For each hour, the same assumption was followed.
After doing that, we have introduced another variable to capture the effect of lags (indicated as lag_1_production). In most cases, lag time was selected as 1.
# Convert wday and month to characters
all_data[, wday := as.character(wday(date, label = TRUE))]
all_data[, month := as.character(month(date, label = TRUE))]
#special period was defined to represent the period when the capacity reaches up to 12.
all_data$special_period <- as.numeric(all_data$date >= as.Date("2022-06-01") & all_data$date <= as.Date("2022-11-16"))
#Linear regression for cummulative production
lm_model <- lm(production ~.+hourly_max_t*month+wday -date, data = all_data)
summary(lm_model)
Call:
lm(formula = production ~ . + hourly_max_t * month + wday - date,
data = all_data)
Residuals:
Min 1Q Median 3Q Max
-9.2566 -1.3381 -0.2832 1.1297 10.1386
Coefficients: (2 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.183e+02 1.806e+00 -65.491 < 2e-16 ***
hour -7.434e-02 2.426e-03 -30.643 < 2e-16 ***
dswrf_surface -8.027e-03 2.250e-04 -35.682 < 2e-16 ***
uswrf_top_of_atmosphere 6.639e-03 2.504e-04 26.511 < 2e-16 ***
csnow_surface 2.107e+00 1.078e-01 19.540 < 2e-16 ***
dlwrf_surface -8.416e-02 9.777e-04 -86.087 < 2e-16 ***
uswrf_surface 1.251e-02 6.585e-04 19.005 < 2e-16 ***
tmp_surface 5.074e-01 6.415e-03 79.098 < 2e-16 ***
hourly_cloud_average 2.601e-02 9.883e-04 26.316 < 2e-16 ***
hourly_max_t NA NA NA NA
is.weekend 4.192e-01 2.437e-01 1.720 0.085457 .
is.ramadan -3.223e-01 6.984e-02 -4.615 3.96e-06 ***
is.religousday 1.978e-01 9.355e-02 2.114 0.034518 *
is.nationalday 2.637e-01 1.608e-01 1.640 0.101032
is.publicholiday -6.239e-01 2.425e-01 -2.573 0.010102 *
wdayCum -1.783e-01 5.583e-02 -3.193 0.001409 **
wdayÇar -1.642e-01 5.564e-02 -2.950 0.003180 **
wdayPaz -1.141e-01 5.577e-02 -2.046 0.040762 *
wdayPer -2.192e-01 5.578e-02 -3.929 8.54e-05 ***
wdayPts -1.291e-01 5.565e-02 -2.320 0.020357 *
wdaySal NA NA NA NA
monthAra -7.767e+00 3.301e+00 -2.353 0.018632 *
monthEki 1.333e-01 2.530e+00 0.053 0.957989
monthEyl -4.919e-01 2.140e+00 -0.230 0.818168
monthHaz -3.846e+01 2.665e+00 -14.432 < 2e-16 ***
monthKas 4.319e+00 2.868e+00 1.506 0.132159
monthMar 9.223e+00 2.223e+00 4.149 3.36e-05 ***
monthMay -2.338e+01 2.367e+00 -9.876 < 2e-16 ***
monthNis -8.559e+00 2.200e+00 -3.891 0.000100 ***
monthOca 7.182e+00 2.500e+00 2.873 0.004070 **
monthŞub 2.010e+01 2.354e+00 8.537 < 2e-16 ***
monthTem -1.940e+01 2.222e+00 -8.731 < 2e-16 ***
special_period 2.101e-01 4.691e-02 4.479 7.54e-06 ***
hourly_max_t:monthAra 3.382e-02 1.174e-02 2.881 0.003962 **
hourly_max_t:monthEki 5.904e-04 8.679e-03 0.068 0.945771
hourly_max_t:monthEyl 1.397e-03 7.209e-03 0.194 0.846370
hourly_max_t:monthHaz 1.356e-01 9.053e-03 14.979 < 2e-16 ***
hourly_max_t:monthKas -1.199e-02 1.002e-02 -1.197 0.231334
hourly_max_t:monthMar -2.815e-02 7.718e-03 -3.647 0.000266 ***
hourly_max_t:monthMay 8.451e-02 8.114e-03 10.415 < 2e-16 ***
hourly_max_t:monthNis 3.230e-02 7.521e-03 4.294 1.76e-05 ***
hourly_max_t:monthOca -2.041e-02 8.828e-03 -2.312 0.020798 *
hourly_max_t:monthŞub -6.574e-02 8.272e-03 -7.948 1.99e-15 ***
hourly_max_t:monthTem 6.668e-02 7.466e-03 8.931 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.144 on 20735 degrees of freedom
(3 observations deleted due to missingness)
Multiple R-squared: 0.664, Adjusted R-squared: 0.6633
F-statistic: 999.3 on 41 and 20735 DF, p-value: < 2.2e-16
available_data <- all_data[!is.na(production) & hour >= 4 & hour <= 18 & date < start_date,]
to_be_forecasted <- all_data[!is.na(production) & hour >= 4 & hour <= 18 & date >= start_date & date <= end_date, ]
library(dplyr)
Once we plot hour_4_data, we observe limited amount of data mainly because the sun usually does not rise at that time. Therefore, our regression value couldn't captured the trend and our predictions were significantly deviated from the actual values , as expected.
# Filter data for hour 4
hour_4_data <- all_data[all_data$hour == 4, ]
hour_4_data$trend_hour_4 <- 1:nrow(hour_4_data)
hour_4_data[, lag_1_production := shift(production,1)]
hour_4_data[,lag_1_diff:=production-lag_1_production]
hour_4_data <- hour_4_data[!is.na(lag_1_production)]
# Fit linear regression model for hour 4
lm_hour_4 <- lm(production ~+lag_1_production +dlwrf_surface+tmp_surface+hourly_cloud_average+special_period+trend_hour_4+month, data = hour_4_data)
# Summarize the model
summary(lm_hour_4)
checkresiduals(lm_hour_4)
Call:
lm(formula = production ~ +lag_1_production + dlwrf_surface +
tmp_surface + hourly_cloud_average + special_period + trend_hour_4 +
month, data = hour_4_data)
Residuals:
Min 1Q Median 3Q Max
-0.121351 -0.001939 -0.000402 0.000778 0.173824
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.239e-02 5.513e-02 0.950 0.34219
lag_1_production 7.021e-01 2.467e-02 28.459 < 2e-16 ***
dlwrf_surface 1.859e-05 3.902e-05 0.476 0.63393
tmp_surface -1.980e-04 2.282e-04 -0.868 0.38588
hourly_cloud_average -1.778e-05 2.921e-05 -0.609 0.54286
special_period -5.201e-03 1.516e-03 -3.431 0.00063 ***
trend_hour_4 3.035e-06 2.114e-06 1.436 0.15137
monthAra -4.042e-03 3.027e-03 -1.335 0.18217
monthEki -5.109e-04 2.435e-03 -0.210 0.83385
monthEyl -2.670e-04 2.246e-03 -0.119 0.90539
monthHaz 1.294e-02 2.430e-03 5.325 1.29e-07 ***
monthKas -2.199e-03 2.750e-03 -0.800 0.42413
monthMar -3.781e-03 3.050e-03 -1.239 0.21550
monthMay -7.163e-04 2.485e-03 -0.288 0.77320
monthNis -3.008e-03 2.612e-03 -1.152 0.24980
monthOca -3.981e-03 3.234e-03 -1.231 0.21862
monthŞub -4.200e-03 3.328e-03 -1.262 0.20728
monthTem 5.044e-03 2.262e-03 2.230 0.02601 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.0119 on 847 degrees of freedom
Multiple R-squared: 0.6806, Adjusted R-squared: 0.6742
F-statistic: 106.2 on 17 and 847 DF, p-value: < 2.2e-16
Breusch-Godfrey test for serial correlation of order up to 21 data: Residuals LM test = 415.11, df = 21, p-value < 2.2e-16
#Production data for hour 4
hour_4_data <- all_data[all_data$hour == 4, ]
# Plot production for hour 4
ggplot(hour_4_data, aes(x = date, y = production)) +
geom_line() +
labs(title = "Hourly Production Data for Hour 4",
x = "Date",
y = "Production") +
theme_minimal()
hour_4_data <- all_data %>%
filter(hour == 4, date >= start_date & date <= end_date)
forecast_data_hour_4 <- to_be_forecasted %>%
filter(hour == 4)
forecast_data_hour_4$trend_hour_4 <- nrow(hour_4_data) + 1:nrow(forecast_data_hour_4)
last_production_value <- tail(hour_4_data$production, 1)
forecast_data_hour_4 <- forecast_data_hour_4 %>%
mutate(lag_1_production = last_production_value)
predictions_hour_4 <- predict(lm_hour_4, newdata = forecast_data_hour_4)
predictions_hour_4[predictions_hour_4 < 0] <- 0
predictions_hour_4_df <- data.frame(
date = forecast_data_hour_4$date,
predicted = predictions_hour_4
)
merged_data <- merge(hour_4_data, predictions_hour_4_df, by = "date")
output_data <- merged_data %>%
select(date, actual = production, predicted)
output_data <- output_data %>%
mutate(error = abs(predicted - actual))
wmape <- sum(output_data$error) / sum(output_data$actual) * 100
print(paste("WMAPE:", wmape, "%"))
output_dir <- "C:/Users/ecemozturk/Desktop"
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
file_path <- file.path(output_dir, "predictions_hour_4_with_actuals.xlsx")
write_xlsx(output_data, file_path)
print(output_data)
[1] "WMAPE: 1407.11310958475 %"
Key: <date>
date actual predicted error
<IDat> <num> <num> <num>
1: 2024-02-01 0.00 0.05524756 0.055247562
2: 2024-02-02 0.00 0.05539115 0.055391154
3: 2024-02-03 0.00 0.05517195 0.055171948
4: 2024-02-04 0.00 0.05462935 0.054629350
5: 2024-02-05 0.00 0.05435240 0.054352396
---
101: 2024-05-11 0.03 0.05726587 0.027265872
102: 2024-05-12 0.00 0.05773654 0.057736539
103: 2024-05-13 0.06 0.05737257 0.002627426
104: 2024-05-14 0.07 0.05818364 0.011816364
105: 2024-05-15 0.08 0.05746433 0.022535670
Similar to hour 5, our model also couldn't improved in lm_hour_5.
# Filter data for hour 5
hour_5_data <- all_data[all_data$hour == 5, ]
hour_5_data <- hour_5_data[,-c(2)]
hour_5_data[, lag_1_production := shift(production,1)]
hour_5_data[,lag_1_diff:=production-lag_1_production]
hour_5_data <- hour_5_data[!is.na(lag_1_production)]
hour_5_data$trend_hour_5 <- 1:nrow(hour_5_data)
# Fit linear regression model for hour 5
lm_hour_5 <- lm(production ~+lag_1_production+dlwrf_surface+is.ramadan+special_period+trend_hour_5 +month*hourly_max_t, data = hour_5_data)
# Summarize the model
summary(lm_hour_5)
checkresiduals(lm_hour_5)
plot(lm_hour_5)
Call:
lm(formula = production ~ +lag_1_production + dlwrf_surface +
is.ramadan + special_period + trend_hour_5 + month * hourly_max_t,
data = hour_5_data)
Residuals:
Min 1Q Median 3Q Max
-0.97514 -0.03869 -0.00864 0.02658 1.76482
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.507e+00 2.805e+00 -0.537 0.5912
lag_1_production 5.048e-01 3.000e-02 16.826 < 2e-16 ***
dlwrf_surface 3.991e-04 2.609e-04 1.530 0.1265
is.ramadan -4.754e-02 2.091e-02 -2.273 0.0233 *
special_period -7.987e-02 1.669e-02 -4.785 2.02e-06 ***
trend_hour_5 1.222e-04 2.369e-05 5.157 3.13e-07 ***
monthAra 2.776e+00 3.181e+00 0.873 0.3830
monthEki 3.291e+00 3.214e+00 1.024 0.3061
monthEyl 2.167e+00 3.253e+00 0.666 0.5054
monthHaz -5.195e+00 4.423e+00 -1.175 0.2405
monthKas 3.170e+00 3.191e+00 0.994 0.3207
monthMar 2.567e+00 2.860e+00 0.898 0.3697
monthMay 4.199e+00 3.186e+00 1.318 0.1879
monthNis -8.816e-01 3.015e+00 -0.292 0.7701
monthOca 3.002e+00 2.879e+00 1.043 0.2974
monthŞub 2.648e+00 2.868e+00 0.923 0.3561
monthTem -2.084e+00 3.595e+00 -0.580 0.5623
hourly_max_t 5.028e-03 9.818e-03 0.512 0.6087
monthAra:hourly_max_t -1.029e-02 1.122e-02 -0.917 0.3595
monthEki:hourly_max_t -1.184e-02 1.127e-02 -1.051 0.2936
monthEyl:hourly_max_t -7.717e-03 1.138e-02 -0.678 0.4977
monthHaz:hourly_max_t 1.857e-02 1.551e-02 1.197 0.2315
monthKas:hourly_max_t -1.158e-02 1.123e-02 -1.031 0.3027
monthMar:hourly_max_t -9.429e-03 1.000e-02 -0.943 0.3460
monthMay:hourly_max_t -1.502e-02 1.117e-02 -1.345 0.1791
monthNis:hourly_max_t 3.273e-03 1.055e-02 0.310 0.7565
monthOca:hourly_max_t -1.111e-02 1.008e-02 -1.103 0.2706
monthŞub:hourly_max_t -9.825e-03 1.004e-02 -0.979 0.3280
monthTem:hourly_max_t 7.583e-03 1.256e-02 0.604 0.5463
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.1276 on 836 degrees of freedom
Multiple R-squared: 0.6002, Adjusted R-squared: 0.5868
F-statistic: 44.83 on 28 and 836 DF, p-value: < 2.2e-16
Breusch-Godfrey test for serial correlation of order up to 32 data: Residuals LM test = 247.14, df = 32, p-value < 2.2e-16
#Production data for hour 5
hour_5_data <- all_data[all_data$hour == 5, ]
# Plot production for hour 4
ggplot(hour_5_data, aes(x = date, y = production)) +
geom_line() +
labs(title = "Hourly Production Data for Hour 5",
x = "Date",
y = "Production") +
theme_minimal()
hour_5_data <- all_data %>%
filter(hour == 5, date >= start_date & date <= end_date)
forecast_data_hour_5 <- to_be_forecasted %>%
filter(hour == 5)
forecast_data_hour_5$trend_hour_5 <- nrow(hour_5_data) + 1:nrow(forecast_data_hour_5)
last_production_value <- tail(hour_5_data$production, 1)
forecast_data_hour_5 <- forecast_data_hour_5 %>%
mutate(lag_1_production = last_production_value)
predictions_hour_5 <- predict(lm_hour_5, newdata = forecast_data_hour_5)
predictions_hour_5[predictions_hour_5 < 0] <- 0
predictions_hour_5_df <- data.frame(
date = forecast_data_hour_5$date,
predicted = predictions_hour_5
)
merged_data <- merge(hour_5_data, predictions_hour_5_df, by = "date")
output_data <- merged_data %>%
select(date, actual = production, predicted)
output_data <- output_data %>%
mutate(error = abs(predicted - actual))
wmape <- sum(output_data$error) / sum(output_data$actual) * 100
print(paste("WMAPE:", wmape, "%"))
output_dir <- "C:/Users/ecemozturk/Desktop"
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
file_path <- file.path(output_dir, "predictions_hour_5_with_actuals.xlsx")
write_xlsx(output_data, file_path)
print(output_data)
[1] "WMAPE: 154.103457098309 %"
Key: <date>
date actual predicted error
<IDat> <num> <num> <num>
1: 2024-02-01 0.00 0.3766761 0.3766761
2: 2024-02-02 0.00 0.3754115 0.3754115
3: 2024-02-03 0.00 0.3770272 0.3770272
4: 2024-02-04 0.00 0.3780127 0.3780127
5: 2024-02-05 0.00 0.3854003 0.3854003
---
101: 2024-05-11 0.64 0.4225375 0.2174625
102: 2024-05-12 0.00 0.4543419 0.4543419
103: 2024-05-13 0.76 0.4710763 0.2889237
104: 2024-05-14 0.96 0.4800365 0.4799635
105: 2024-05-15 0.84 0.4798892 0.3601108
As the number of data we have at hand increases, our regression models started to perform better,according to wmape levels.
# Filter data for hour 6
hour_6_data <- all_data[all_data$hour == 6, ]
hour_6_data <- hour_6_data[, -c(2)]
# Convert the data frame to a data.table
setDT(hour_6_data)
# Create a lagged variable for production with a lag of 1 period
hour_6_data[, lag_1_production := shift(production,1)]
hour_6_data[,lag_1_diff:=production-lag_1_production]
hour_6_data <- hour_6_data[!is.na(lag_1_production)]
hour_6_data$trend_hour_6 <- 1:nrow(hour_6_data)
# Fit linear regression model for hour 6
lm_hour_6 <- lm(production~+lag_1_production+uswrf_top_of_atmosphere+wday+ hourly_cloud_average+is.ramadan+special_period+is.religousday+month*hourly_max_t, data = hour_6_data)
#lm_hour_6 <- lm(production ~ +uswrf_top_of_atmosphere + is.ramadan +is.weekend+ is.religousday +is.publicholiday + month*hourly_max_t , data = hour_6_data)
# Summarize the model
summary(lm_hour_6)
checkresiduals(lm_hour_6)
plot(lm_hour_6)
Call:
lm(formula = production ~ +lag_1_production + uswrf_top_of_atmosphere +
wday + hourly_cloud_average + is.ramadan + special_period +
is.religousday + month * hourly_max_t, data = hour_6_data)
Residuals:
Min 1Q Median 3Q Max
-2.5222 -0.2081 -0.0312 0.1482 4.4413
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.368e+00 1.318e+01 -0.104 0.9173
lag_1_production 4.266e-01 3.218e-02 13.256 <2e-16 ***
uswrf_top_of_atmosphere 5.519e-02 3.712e-02 1.487 0.1375
wdayCum -1.306e-01 7.841e-02 -1.666 0.0962 .
wdayÇar 4.254e-02 7.888e-02 0.539 0.5898
wdayPaz -1.408e-01 7.828e-02 -1.799 0.0724 .
wdayPer -7.452e-02 7.865e-02 -0.947 0.3437
wdayPts -3.051e-02 7.888e-02 -0.387 0.6990
wdaySal -1.332e-01 7.870e-02 -1.693 0.0909 .
hourly_cloud_average -2.392e-03 9.442e-04 -2.533 0.0115 *
is.ramadan -1.829e-01 9.955e-02 -1.837 0.0666 .
special_period -7.515e-01 7.904e-02 -9.508 <2e-16 ***
is.religousday 1.018e-01 1.330e-01 0.766 0.4440
monthAra -3.779e+00 1.518e+01 -0.249 0.8035
monthEki 9.220e+00 1.524e+01 0.605 0.5453
monthEyl 4.213e-01 1.545e+01 0.027 0.9783
monthHaz 9.651e+00 2.127e+01 0.454 0.6502
monthKas 3.229e+00 1.518e+01 0.213 0.8316
monthMar -1.643e+00 1.354e+01 -0.121 0.9035
monthMay 2.190e+01 1.520e+01 1.441 0.1500
monthNis -1.766e+01 1.435e+01 -1.231 0.2187
monthOca -3.706e-02 1.362e+01 -0.003 0.9978
monthŞub -1.313e-01 1.358e+01 -0.010 0.9923
monthTem 1.194e+00 1.731e+01 0.069 0.9450
hourly_max_t 8.200e-03 4.598e-02 0.178 0.8585
monthAra:hourly_max_t 1.123e-02 5.364e-02 0.209 0.8343
monthEki:hourly_max_t -3.356e-02 5.350e-02 -0.627 0.5306
monthEyl:hourly_max_t -1.327e-03 5.413e-02 -0.025 0.9804
monthHaz:hourly_max_t -3.341e-02 7.440e-02 -0.449 0.6534
monthKas:hourly_max_t -1.354e-02 5.348e-02 -0.253 0.8002
monthMar:hourly_max_t 4.398e-03 4.739e-02 0.093 0.9261
monthMay:hourly_max_t -7.962e-02 5.342e-02 -1.491 0.1365
monthNis:hourly_max_t 6.322e-02 5.027e-02 1.258 0.2089
monthOca:hourly_max_t -2.352e-03 4.774e-02 -0.049 0.9607
monthŞub:hourly_max_t -1.929e-03 4.758e-02 -0.041 0.9677
monthTem:hourly_max_t -2.952e-03 6.055e-02 -0.049 0.9611
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.6133 on 829 degrees of freedom
Multiple R-squared: 0.6422, Adjusted R-squared: 0.6271
F-statistic: 42.52 on 35 and 829 DF, p-value: < 2.2e-16
Breusch-Godfrey test for serial correlation of order up to 39 data: Residuals LM test = 196.53, df = 39, p-value < 2.2e-16
hour_6_data <- all_data[all_data$hour == 6, ]
# Plot production for hour 6
ggplot(hour_6_data, aes(x = date, y = production)) +
geom_line() +
labs(title = "Hourly Production Data for Hour 6",
x = "Date",
y = "Production") +
theme_minimal()
hour_6_data <- all_data %>%
filter(hour == 6, date >= start_date & date <= end_date)
forecast_data_hour_6 <- to_be_forecasted %>%
filter(hour == 6)
forecast_data_hour_6$trend_hour_6 <- nrow(hour_6_data) + 1:nrow(forecast_data_hour_6)
last_production_value <- tail(hour_6_data$production, 1)
forecast_data_hour_6 <- forecast_data_hour_6 %>%
mutate(lag_1_production = last_production_value)
predictions_hour_6 <- predict(lm_hour_6, newdata = forecast_data_hour_6)
predictions_hour_6[predictions_hour_6 < 0] <- 0
predictions_hour_6_df <- data.frame(
date = forecast_data_hour_6$date,
predicted = predictions_hour_6
)
merged_data <- merge(hour_6_data, predictions_hour_6_df, by = "date")
output_data <- merged_data %>%
select(date, actual = production, predicted)
output_data <- output_data %>%
mutate(error = abs(predicted - actual))
wmape <- sum(output_data$error) / sum(output_data$actual) * 100
print(paste("WMAPE:", wmape, "%"))
output_dir <- "C:/Users/ecemozturk/Desktop"
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
file_path <- file.path(output_dir, "predictions_hour_6_with_actuals.xlsx")
write_xlsx(output_data, file_path)
print(output_data)
[1] "WMAPE: 66.2971330635574 %"
Key: <date>
date actual predicted error
<IDat> <num> <num> <num>
1: 2024-02-01 0.01 1.302795 1.2927954
2: 2024-02-02 0.01 1.290989 1.2809892
3: 2024-02-03 0.01 1.375376 1.3653757
4: 2024-02-04 0.07 1.148062 1.0780617
5: 2024-02-05 0.00 1.209222 1.2092218
---
101: 2024-05-11 2.51 1.661629 0.8483713
102: 2024-05-12 0.00 1.599317 1.5993171
103: 2024-05-13 2.69 1.814346 0.8756536
104: 2024-05-14 2.59 1.908270 0.6817298
105: 2024-05-15 2.94 2.044396 0.8956045
As we analyze hour 7, we observed significant improvement in the performance of our forecast with WMAPE level of 29.7652596829167 %. Since we have necessary amount of data, our models also improved as expected. In addition, oır residuals started to fit betteer to QQ plot of standardized residuals vs theoretical quantiles plot. Similar calculations and processes were also followed for other hours.
library(data.table)
# Filter data for hour 7 and remove the hour column
hour_7_data <- all_data[all_data$hour == 7, ]
hour_7_data <- hour_7_data[, -c(2)]
# Create a trend variable for hour 7
hour_7_data$trend_hour_7 <- 1:nrow(hour_7_data)
# Convert the data frame to a data.table
setDT(hour_7_data)
# Create a lagged variable for production with a lag of 1 period
hour_7_data[, lag_1_production := shift(production,1)]
hour_7_data[,lag_1_diff:=production-lag_1_production]
# Remove rows with NA in lagged production to ensure the model can run
hour_7_data <- hour_7_data[!is.na(lag_1_production)]
# Fit linear regression model for hour 7 including the lagged variable
lm_hour_7 <- lm(production ~+lag_1_production+trend_hour_7 + special_period + dlwrf_surface + hourly_cloud_average + month+hourly_max_t,data=hour_7_data)
summary(lm_hour_7)
# Check residuals
library(forecast)
checkresiduals(lm_hour_7)
# Plot the model
plot(lm_hour_7)
Call:
lm(formula = production ~ +lag_1_production + trend_hour_7 +
special_period + dlwrf_surface + hourly_cloud_average + month +
hourly_max_t, data = hour_7_data)
Residuals:
Min 1Q Median 3Q Max
-5.4844 -0.6254 0.0878 0.7845 9.0720
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.009e+01 5.698e+00 -3.526 0.000444 ***
lag_1_production 2.783e-01 3.120e-02 8.918 < 2e-16 ***
trend_hour_7 6.333e-04 2.351e-04 2.693 0.007215 **
special_period -5.767e-01 1.690e-01 -3.413 0.000673 ***
dlwrf_surface -1.845e-02 4.190e-03 -4.402 1.21e-05 ***
hourly_cloud_average -8.618e-03 3.471e-03 -2.483 0.013218 *
monthAra -1.474e+00 3.846e-01 -3.834 0.000135 ***
monthEki 7.961e-01 3.071e-01 2.592 0.009700 **
monthEyl 7.637e-01 2.710e-01 2.819 0.004937 **
monthHaz 1.155e+00 2.637e-01 4.378 1.35e-05 ***
monthKas -5.118e-01 3.473e-01 -1.474 0.140955
monthMar 1.082e-01 3.813e-01 0.284 0.776751
monthMay 5.781e-01 2.890e-01 2.001 0.045751 *
monthNis 6.890e-01 3.066e-01 2.247 0.024893 *
monthOca -1.459e+00 4.083e-01 -3.575 0.000370 ***
monthŞub -6.433e-01 4.173e-01 -1.542 0.123544
monthTem 1.014e+00 2.601e-01 3.901 0.000104 ***
hourly_max_t 9.756e-02 2.323e-02 4.200 2.95e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.381 on 847 degrees of freedom
Multiple R-squared: 0.6439, Adjusted R-squared: 0.6367
F-statistic: 90.08 on 17 and 847 DF, p-value: < 2.2e-16
Breusch-Godfrey test for serial correlation of order up to 21 data: Residuals LM test = 65.049, df = 21, p-value = 2.136e-06
# Plot production for hour 7
ggplot(hour_7_data, aes(x = date, y = production)) +
geom_line() +
labs(title = "Hourly Production Data for Hour 7",
x = "Date",
y = "Production") +
theme_minimal()
Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>”
hour_7_data <- all_data %>%
filter(hour == 7, date >= start_date & date <= end_date)
forecast_data_hour_7 <- to_be_forecasted %>%
filter(hour == 7)
forecast_data_hour_7$trend_hour_7 <- nrow(hour_7_data) + 1:nrow(forecast_data_hour_7)
last_production_value <- tail(hour_7_data$production, 1)
forecast_data_hour_7 <- forecast_data_hour_7 %>%
mutate(lag_1_production = last_production_value)
predictions_hour_7 <- predict(lm_hour_7, newdata = forecast_data_hour_7)
predictions_hour_7[predictions_hour_7 < 0] <- 0
predictions_hour_7_df <- data.frame(
date = forecast_data_hour_7$date,
predicted = predictions_hour_7
)
merged_data <- merge(hour_7_data, predictions_hour_7_df, by = "date")
output_data <- merged_data %>%
select(date, actual = production, predicted)
output_data <- output_data %>%
mutate(error = abs(predicted - actual))
wmape <- sum(output_data$error) / sum(output_data$actual) * 100
print(paste("WMAPE:", wmape, "%"))
output_dir <- "C:/Users/ecemozturk/Desktop"
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
file_path <- file.path(output_dir, "predictions_hour_7_with_actuals.xlsx")
write_xlsx(output_data, file_path)
print(output_data)
[1] "WMAPE: 29.7652596829167 %"
Key: <date>
date actual predicted error
<IDat> <num> <num> <num>
1: 2024-02-01 1.59 3.143221 1.5532210
2: 2024-02-02 1.62 3.556651 1.9366508
3: 2024-02-03 0.94 3.054084 2.1140840
4: 2024-02-04 1.03 2.487699 1.4576990
5: 2024-02-05 0.88 1.745296 0.8652960
---
101: 2024-05-11 6.27 4.955730 1.3142697
102: 2024-05-12 0.00 3.903619 3.9036191
103: 2024-05-13 5.75 3.460671 2.2893287
104: 2024-05-14 4.14 3.880553 0.2594466
105: 2024-05-15 6.58 4.327481 2.2525187
# Filter data for hour 8
hour_8_data <- all_data[all_data$hour == 8, ]
hour_8_data <- hour_8_data[,-c(2)]
hour_8_data$trend_hour_8 <- 1:nrow(hour_8_data)
# Create a lagged variable for production with a lag of 1 period
hour_8_data[, lag_1_production := shift(production,1)]
hour_8_data[,lag_1_diff:=production-lag_1_production]
# Remove rows with NA in lagged production to ensure the model can run
hour_8_data <- hour_8_data[!is.na(lag_1_production)]
#Fit linear regression model for hour 8
lm_hour_8 <- lm(production ~ +lag_1_production+trend_hour_8+ csnow_surface+dlwrf_surface+hourly_cloud_average+is.ramadan+month*hourly_max_t, data = hour_8_data)
# Summarize the model
summary(lm_hour_8)
checkresiduals(lm_hour_8)
plot(lm_hour_8)
Call:
lm(formula = production ~ +lag_1_production + trend_hour_8 +
csnow_surface + dlwrf_surface + hourly_cloud_average + is.ramadan +
month * hourly_max_t, data = hour_8_data)
Residuals:
Min 1Q Median 3Q Max
-7.7347 -0.9508 0.2114 1.0988 5.7171
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.007e+00 3.530e+01 0.085 0.93214
lag_1_production 1.645e-01 2.916e-02 5.640 2.33e-08 ***
trend_hour_8 1.089e-03 2.785e-04 3.910 9.96e-05 ***
csnow_surface -1.217e+00 4.449e-01 -2.736 0.00636 **
dlwrf_surface -2.800e-02 5.213e-03 -5.373 1.01e-07 ***
hourly_cloud_average -2.446e-02 4.726e-03 -5.174 2.86e-07 ***
is.ramadan -6.163e-01 3.035e-01 -2.031 0.04259 *
monthAra -6.920e+01 4.174e+01 -1.658 0.09774 .
monthEki -2.122e+01 4.112e+01 -0.516 0.60601
monthEyl 2.384e+01 4.017e+01 0.593 0.55305
monthHaz -1.170e+01 4.634e+01 -0.252 0.80072
monthKas -2.417e+01 4.048e+01 -0.597 0.55073
monthMar -3.480e+01 3.654e+01 -0.952 0.34121
monthMay -5.104e+00 3.892e+01 -0.131 0.89569
monthNis -1.825e+01 3.752e+01 -0.486 0.62684
monthOca -4.946e+01 3.652e+01 -1.354 0.17605
monthŞub -3.804e+01 3.634e+01 -1.047 0.29550
monthTem -1.275e+01 4.553e+01 -0.280 0.77950
hourly_max_t 3.750e-02 1.191e-01 0.315 0.75291
monthAra:hourly_max_t 2.434e-01 1.438e-01 1.692 0.09107 .
monthEki:hourly_max_t 7.415e-02 1.398e-01 0.531 0.59584
monthEyl:hourly_max_t -8.233e-02 1.353e-01 -0.608 0.54312
monthHaz:hourly_max_t 4.065e-02 1.564e-01 0.260 0.79500
monthKas:hourly_max_t 8.094e-02 1.383e-01 0.585 0.55858
monthMar:hourly_max_t 1.242e-01 1.233e-01 1.007 0.31409
monthMay:hourly_max_t 1.699e-02 1.313e-01 0.129 0.89707
monthNis:hourly_max_t 6.320e-02 1.264e-01 0.500 0.61721
monthOca:hourly_max_t 1.710e-01 1.235e-01 1.385 0.16638
monthŞub:hourly_max_t 1.333e-01 1.227e-01 1.086 0.27767
monthTem:hourly_max_t 4.460e-02 1.531e-01 0.291 0.77096
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.818 on 835 degrees of freedom
Multiple R-squared: 0.5892, Adjusted R-squared: 0.5749
F-statistic: 41.29 on 29 and 835 DF, p-value: < 2.2e-16
Breusch-Godfrey test for serial correlation of order up to 33 data: Residuals LM test = 68.172, df = 33, p-value = 0.0003044
# Plot production for hour 8
ggplot(hour_8_data, aes(x = date, y = production)) +
geom_line() +
labs(title = "Hourly Production Data for Hour 8",
x = "Date",
y = "Production") +
theme_minimal()
hour_8_data <- all_data %>%
filter(hour == 8, date >= start_date & date <= end_date)
forecast_data_hour_8 <- to_be_forecasted %>%
filter(hour == 8)
forecast_data_hour_8$trend_hour_8 <- nrow(hour_8_data) + 1:nrow(forecast_data_hour_8)
last_production_value <- tail(hour_8_data$production, 1)
forecast_data_hour_8 <- forecast_data_hour_8 %>%
mutate(lag_1_production = last_production_value)
predictions_hour_8 <- predict(lm_hour_8, newdata = forecast_data_hour_8)
predictions_hour_8[predictions_hour_8 < 0] <- 0
predictions_hour_8_df <- data.frame(
date = forecast_data_hour_8$date,
predicted = predictions_hour_8
)
merged_data <- merge(hour_8_data, predictions_hour_8_df, by = "date")
output_data <- merged_data %>%
select(date, actual = production, predicted)
output_data <- output_data %>%
mutate(error = abs(predicted - actual))
wmape <- sum(output_data$error) / sum(output_data$actual) * 100
print(paste("WMAPE:", wmape, "%"))
output_dir <- "C:/Users/ecemozturk/Desktop"
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
file_path <- file.path(output_dir, "predictions_hour_8_with_actuals.xlsx")
write_xlsx(output_data, file_path)
print(output_data)
[1] "WMAPE: 22.6917817588993 %"
Key: <date>
date actual predicted error
<IDat> <num> <num> <num>
1: 2024-02-01 6.17 5.694100 0.4759005
2: 2024-02-02 6.15 6.627810 0.4778098
3: 2024-02-03 4.21 5.471073 1.2610726
4: 2024-02-04 5.04 4.426163 0.6138374
5: 2024-02-05 3.01 2.770403 0.2395970
---
101: 2024-05-11 9.07 7.128347 1.9416530
102: 2024-05-12 0.00 5.323364 5.3233640
103: 2024-05-13 9.24 4.310057 4.9299429
104: 2024-05-14 7.60 5.473529 2.1264705
105: 2024-05-15 9.13 5.993096 3.1369043
For hour 8, although there are still higher instances of lags in the residual ACF plot, no additional lag parameter was added to prevent multicollinearity.Similar assumption was also followed with other hours.
# Filter data for hour 9
hour_9_data <- all_data[all_data$hour == 9, ]
hour_9_data <- hour_9_data[,-c(2)]
hour_9_data$trend_hour_9 <- 1:nrow(hour_9_data)
hour_9_data[, lag_1_production := shift(production,1)]
hour_9_data[,lag_1_diff:=production-lag_1_production]
# Remove rows with NA in lagged production to ensure the model can run
hour_9_data <- hour_9_data[!is.na(lag_1_production)]
# Fit linear regression model for hour 9
lm_hour_9 <- lm(production ~+lag_1_production+special_period+dswrf_surface+csnow_surface+dlwrf_surface+is.nationalday+uswrf_surface+hourly_cloud_average+month*hourly_max_t, data = hour_9_data)
# Summarize the model
summary(lm_hour_9)
checkresiduals(lm_hour_9)
plot(lm_hour_9)
Call:
lm(formula = production ~ +lag_1_production + special_period +
dswrf_surface + csnow_surface + dlwrf_surface + is.nationalday +
uswrf_surface + hourly_cloud_average + month * hourly_max_t,
data = hour_9_data)
Residuals:
Min 1Q Median 3Q Max
-8.3796 -1.0059 0.2701 1.2028 5.2072
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.673e+01 3.908e+01 -0.684 0.49423
lag_1_production 8.335e-02 2.831e-02 2.945 0.00332 **
special_period -3.158e-01 2.134e-01 -1.480 0.13923
dswrf_surface -3.688e-02 1.474e-02 -2.502 0.01255 *
csnow_surface -1.383e+00 5.236e-01 -2.641 0.00841 **
dlwrf_surface -3.795e-02 6.159e-03 -6.161 1.12e-09 ***
is.nationalday 1.015e+00 5.652e-01 1.796 0.07284 .
uswrf_surface 1.028e-01 4.429e-02 2.320 0.02058 *
hourly_cloud_average -2.812e-02 5.303e-03 -5.304 1.45e-07 ***
monthAra -1.102e+02 4.719e+01 -2.334 0.01982 *
monthEki -3.741e+01 4.523e+01 -0.827 0.40833
monthEyl 2.276e+01 4.286e+01 0.531 0.59555
monthHaz -2.816e+01 4.784e+01 -0.589 0.55618
monthKas -1.590e+01 4.274e+01 -0.372 0.70990
monthMar -3.979e+01 4.214e+01 -0.944 0.34530
monthMay -1.751e+01 4.198e+01 -0.417 0.67663
monthNis -1.645e+01 4.081e+01 -0.403 0.68701
monthOca -7.982e+01 4.042e+01 -1.975 0.04860 *
monthŞub -2.391e+01 4.025e+01 -0.594 0.55266
monthTem 2.289e+01 4.695e+01 0.488 0.62600
hourly_max_t 1.581e-01 1.299e-01 1.217 0.22396
monthAra:hourly_max_t 3.981e-01 1.604e-01 2.481 0.01329 *
monthEki:hourly_max_t 1.316e-01 1.506e-01 0.874 0.38220
monthEyl:hourly_max_t -7.465e-02 1.411e-01 -0.529 0.59699
monthHaz:hourly_max_t 1.012e-01 1.584e-01 0.639 0.52314
monthKas:hourly_max_t 5.577e-02 1.424e-01 0.392 0.69550
monthMar:hourly_max_t 1.448e-01 1.402e-01 1.032 0.30227
monthMay:hourly_max_t 6.412e-02 1.388e-01 0.462 0.64420
monthNis:hourly_max_t 5.935e-02 1.347e-01 0.441 0.65955
monthOca:hourly_max_t 2.893e-01 1.342e-01 2.156 0.03139 *
monthŞub:hourly_max_t 8.869e-02 1.334e-01 0.665 0.50621
monthTem:hourly_max_t -7.121e-02 1.546e-01 -0.461 0.64511
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.981 on 833 degrees of freedom
Multiple R-squared: 0.5639, Adjusted R-squared: 0.5476
F-statistic: 34.74 on 31 and 833 DF, p-value: < 2.2e-16
Breusch-Godfrey test for serial correlation of order up to 35 data: Residuals LM test = 20.373, df = 35, p-value = 0.9769
# Plot production for hour 9
ggplot(hour_9_data, aes(x = date, y = production)) +
geom_line() +
labs(title = "Hourly Production Data for Hour 9",
x = "Date",
y = "Production") +
theme_minimal()
Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>”
hour_9_data <- all_data %>%
filter(hour == 9, date >= start_date & date <= end_date)
forecast_data_hour_9 <- to_be_forecasted %>%
filter(hour == 9)
forecast_data_hour_9$trend_hour_9 <- nrow(hour_9_data) + 1:nrow(forecast_data_hour_9)
last_production_value <- tail(hour_9_data$production, 1)
forecast_data_hour_9 <- forecast_data_hour_9 %>%
mutate(lag_1_production = last_production_value)
predictions_hour_9 <- predict(lm_hour_9, newdata = forecast_data_hour_9)
predictions_hour_9[predictions_hour_9 < 0] <- 0
predictions_hour_9_df <- data.frame(
date = forecast_data_hour_9$date,
predicted = predictions_hour_9
)
merged_data <- merge(hour_9_data, predictions_hour_9_df, by = "date")
output_data <- merged_data %>%
select(date, actual = production, predicted)
output_data <- output_data %>%
mutate(error = abs(predicted - actual))
wmape <- sum(output_data$error) / sum(output_data$actual) * 100
print(paste("WMAPE:", wmape, "%"))
output_dir <- "C:/Users/ecemozturk/Desktop"
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
file_path <- file.path(output_dir, "predictions_hour_9_with_actuals.xlsx")
write_xlsx(output_data, file_path)
print(output_data)
[1] "WMAPE: 19.5185218600221 %"
Key: <date>
date actual predicted error
<IDat> <num> <num> <num>
1: 2024-02-01 7.11 8.060603 0.9506034
2: 2024-02-02 9.03 9.322309 0.2923085
3: 2024-02-03 6.83 7.574831 0.7448312
4: 2024-02-04 9.02 6.479980 2.5400196
5: 2024-02-05 4.21 4.660045 0.4500446
---
101: 2024-05-11 9.85 8.116059 1.7339413
102: 2024-05-12 0.00 6.053854 6.0538535
103: 2024-05-13 8.88 4.573623 4.3063768
104: 2024-05-14 8.83 5.654152 3.1758482
105: 2024-05-15 9.82 6.814517 3.0054834
# Create a trend variable for hour 10
hour_10_data <- all_data[all_data$hour == 10, ]
hour_10_data <- hour_10_data[,-c(2)]
hour_10_data$trend_hour_10 <- 1:nrow(hour_10_data)
hour_10_data[, lag_1_production := shift(production,1)]
hour_10_data[,lag_1_diff:=production-lag_1_production]
# Remove rows with NA in lagged production to ensure the model can run
hour_10_data <- hour_10_data[!is.na(lag_1_production)]
# Fit linear regression model for hour 10
lm_hour_10 <- lm(production ~+lag_1_production +csnow_surface+dlwrf_surface+hourly_cloud_average+is.weekend+is.religousday+is.nationalday+month*hourly_max_t, data = hour_10_data)
summary(lm_hour_10)
checkresiduals(lm_hour_10)
plot(lm_hour_10)
pacf(hour_10_data$production)
Call:
lm(formula = production ~ +lag_1_production + csnow_surface +
dlwrf_surface + hourly_cloud_average + is.weekend + is.religousday +
is.nationalday + month * hourly_max_t, data = hour_10_data)
Residuals:
Min 1Q Median 3Q Max
-9.8962 -0.6891 0.2512 1.0714 5.4454
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.762e+01 3.911e+01 -0.706 0.48024
lag_1_production 7.410e-02 2.789e-02 2.657 0.00804 **
csnow_surface -2.278e+00 5.748e-01 -3.964 8.01e-05 ***
dlwrf_surface -3.159e-02 4.756e-03 -6.643 5.55e-11 ***
hourly_cloud_average -2.114e-02 5.198e-03 -4.067 5.21e-05 ***
is.weekend -1.860e-01 1.552e-01 -1.198 0.23113
is.religousday 8.104e-01 4.378e-01 1.851 0.06450 .
is.nationalday 7.260e-01 5.847e-01 1.242 0.21470
monthAra -1.330e+02 4.784e+01 -2.780 0.00556 **
monthEki -2.118e+01 4.538e+01 -0.467 0.64071
monthEyl 3.466e+00 4.299e+01 0.081 0.93575
monthHaz -4.052e+01 4.656e+01 -0.870 0.38434
monthKas -1.036e+01 4.223e+01 -0.245 0.80617
monthMar 1.790e+01 4.028e+01 0.445 0.65679
monthMay -3.094e+00 4.168e+01 -0.074 0.94084
monthNis 3.873e+00 4.079e+01 0.095 0.92437
monthOca -7.934e+01 4.127e+01 -1.922 0.05490 .
monthŞub 1.043e+01 4.058e+01 0.257 0.79716
monthTem 1.904e+01 4.619e+01 0.412 0.68038
hourly_max_t 1.517e-01 1.266e-01 1.199 0.23102
monthAra:hourly_max_t 4.782e-01 1.600e-01 2.989 0.00288 **
monthEki:hourly_max_t 7.438e-02 1.483e-01 0.501 0.61616
monthEyl:hourly_max_t -1.009e-02 1.390e-01 -0.073 0.94216
monthHaz:hourly_max_t 1.358e-01 1.514e-01 0.897 0.36997
monthKas:hourly_max_t 3.748e-02 1.378e-01 0.272 0.78568
monthMar:hourly_max_t -5.926e-02 1.306e-01 -0.454 0.65010
monthMay:hourly_max_t 1.042e-02 1.351e-01 0.077 0.93855
monthNis:hourly_max_t -1.247e-02 1.320e-01 -0.094 0.92475
monthOca:hourly_max_t 2.886e-01 1.349e-01 2.140 0.03267 *
monthŞub:hourly_max_t -3.244e-02 1.320e-01 -0.246 0.80598
monthTem:hourly_max_t -6.103e-02 1.496e-01 -0.408 0.68333
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.049 on 834 degrees of freedom
Multiple R-squared: 0.5422, Adjusted R-squared: 0.5257
F-statistic: 32.92 on 30 and 834 DF, p-value: < 2.2e-16
Breusch-Godfrey test for serial correlation of order up to 34 data: Residuals LM test = 72.303, df = 34, p-value = 0.0001407
# Plot production for hour 10
ggplot(hour_10_data, aes(x = date, y = production)) +
geom_line() +
labs(title = "Hourly Production Data for Hour 10",
x = "Date",
y = "Production") +
theme_minimal()
Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>”
hour_10_data <- all_data %>%
filter(hour == 10, date >= start_date & date <= end_date)
forecast_data_hour_10 <- to_be_forecasted %>%
filter(hour == 10)
forecast_data_hour_10$trend_hour_10 <- nrow(hour_10_data) + 1:nrow(forecast_data_hour_10)
last_production_value <- tail(hour_10_data$production, 1)
forecast_data_hour_10 <- forecast_data_hour_10 %>%
mutate(lag_1_production = last_production_value)
predictions_hour_10 <- predict(lm_hour_10, newdata = forecast_data_hour_10)
predictions_hour_10[predictions_hour_10 < 0] <- 0
predictions_hour_10_df <- data.frame(
date = forecast_data_hour_10$date,
predicted = predictions_hour_10
)
merged_data <- merge(hour_10_data, predictions_hour_10_df, by = "date")
output_data <- merged_data %>%
select(date, actual = production, predicted)
output_data <- output_data %>%
mutate(error = abs(predicted - actual))
wmape <- sum(output_data$error) / sum(output_data$actual) * 100
print(paste("WMAPE:", wmape, "%"))
output_dir <- "C:/Users/ecemozturk/Desktop"
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
file_path <- file.path(output_dir, "predictions_hour_10_with_actuals.xlsx")
write_xlsx(output_data, file_path)
print(output_data)
[1] "WMAPE: 17.1336671941216 %"
Key: <date>
date actual predicted error
<IDat> <num> <num> <num>
1: 2024-02-01 5.20 9.103181 3.90318118
2: 2024-02-02 9.85 9.791868 0.05813237
3: 2024-02-03 6.11 7.154666 1.04466562
4: 2024-02-04 9.83 7.315851 2.51414880
5: 2024-02-05 6.69 6.172011 0.51798898
---
101: 2024-05-11 9.54 8.404196 1.13580386
102: 2024-05-12 0.00 6.531169 6.53116888
103: 2024-05-13 7.85 4.705113 3.14488672
104: 2024-05-14 7.44 7.697921 0.25792078
105: 2024-05-15 9.87 8.350045 1.51995509
# Create a trend variable for hour 11
hour_11_data <- all_data[all_data$hour == 11, ]
hour_11_data <- hour_11_data[,-c(2)]
hour_11_data$trend_hour_11 <- 1:nrow(hour_11_data)
hour_11_data[, lag_19_production := shift(production,19)]
hour_11_data[,lag_19_diff:=production-lag_19_production]
# Remove rows with NA in lagged production to ensure the model can run
hour_11_data <- hour_11_data[!is.na(lag_19_production)]
# Fit linear regression model for hour 11
lm_hour_11 <- lm(production ~+lag_19_production+trend_hour_11+special_period+csnow_surface+dlwrf_surface+tmp_surface+hourly_cloud_average+is.weekend+is.religousday+is.nationalday+month*hourly_max_t, data = hour_11_data)
summary(lm_hour_11)
checkresiduals(lm_hour_11)
plot(lm_hour_11)
Call:
lm(formula = production ~ +lag_19_production + trend_hour_11 +
special_period + csnow_surface + dlwrf_surface + tmp_surface +
hourly_cloud_average + is.weekend + is.religousday + is.nationalday +
month * hourly_max_t, data = hour_11_data)
Residuals:
Min 1Q Median 3Q Max
-8.7618 -0.6632 0.2996 1.0666 6.7623
Coefficients: (1 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) -7.716e+01 3.824e+01 -2.018 0.04394 *
lag_19_production -8.089e-02 2.593e-02 -3.120 0.00187 **
trend_hour_11 -5.759e-04 3.821e-04 -1.507 0.13214
special_period 3.920e-01 2.550e-01 1.537 0.12472
csnow_surface -1.899e+00 5.783e-01 -3.285 0.00106 **
dlwrf_surface -3.286e-02 4.517e-03 -7.276 8.12e-13 ***
tmp_surface 3.146e-01 1.221e-01 2.577 0.01014 *
hourly_cloud_average -1.540e-02 5.328e-03 -2.891 0.00394 **
is.weekend -1.661e-01 1.554e-01 -1.069 0.28525
is.religousday 1.466e-01 4.444e-01 0.330 0.74160
is.nationalday 2.450e-01 5.779e-01 0.424 0.67173
monthAra -7.152e+01 4.482e+01 -1.596 0.11094
monthEki 3.889e+01 4.401e+01 0.884 0.37711
monthEyl 3.329e+01 4.175e+01 0.797 0.42550
monthHaz 3.922e+01 4.427e+01 0.886 0.37591
monthKas 1.830e+01 4.073e+01 0.449 0.65327
monthMar 6.715e+01 3.935e+01 1.707 0.08828 .
monthMay 4.722e+01 4.049e+01 1.166 0.24381
monthNis 4.275e+01 3.974e+01 1.076 0.28237
monthOca -3.682e+01 4.078e+01 -0.903 0.36678
monthŞub 7.621e+01 3.968e+01 1.921 0.05513 .
monthTem 4.128e+01 4.461e+01 0.925 0.35500
hourly_max_t NA NA NA NA
monthAra:hourly_max_t 2.730e-01 1.471e-01 1.856 0.06384 .
monthEki:hourly_max_t -1.202e-01 1.419e-01 -0.847 0.39706
monthEyl:hourly_max_t -1.036e-01 1.332e-01 -0.778 0.43658
monthHaz:hourly_max_t -1.240e-01 1.421e-01 -0.873 0.38299
monthKas:hourly_max_t -4.877e-02 1.309e-01 -0.372 0.70962
monthMar:hourly_max_t -2.166e-01 1.259e-01 -1.721 0.08572 .
monthMay:hourly_max_t -1.520e-01 1.295e-01 -1.174 0.24087
monthNis:hourly_max_t -1.345e-01 1.269e-01 -1.059 0.28970
monthOca:hourly_max_t 1.516e-01 1.318e-01 1.150 0.25041
monthŞub:hourly_max_t -2.490e-01 1.274e-01 -1.955 0.05098 .
monthTem:hourly_max_t -1.296e-01 1.426e-01 -0.909 0.36342
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.03 on 814 degrees of freedom
Multiple R-squared: 0.5386, Adjusted R-squared: 0.5204
F-statistic: 29.69 on 32 and 814 DF, p-value: < 2.2e-16
Breusch-Godfrey test for serial correlation of order up to 37 data: Residuals LM test = 59.296, df = 37, p-value = 0.01143
# Plot production for hour 11
ggplot(hour_11_data, aes(x = date, y = production)) +
geom_line() +
labs(title = "Hourly Production Data for Hour 11",
x = "Date",
y = "Production") +
theme_minimal()
Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>”
hour_11_data <- all_data %>%
filter(hour == 11, date >= start_date & date <= end_date)
forecast_data_hour_11 <- to_be_forecasted %>%
filter(hour == 11)
forecast_data_hour_11$trend_hour_11 <- nrow(hour_11_data) + 1:nrow(forecast_data_hour_11)
last_production_value <- tail(hour_11_data$production, 1)
forecast_data_hour_11 <- forecast_data_hour_11 %>%
mutate(lag_19_production = last_production_value)
predictions_hour_11 <- predict(lm_hour_11, newdata = forecast_data_hour_11)
predictions_hour_11[predictions_hour_11 < 0] <- 0
predictions_hour_11_df <- data.frame(
date = forecast_data_hour_11$date,
predicted = predictions_hour_11
)
merged_data <- merge(hour_11_data, predictions_hour_11_df, by = "date")
output_data <- merged_data %>%
select(date, actual = production, predicted)
output_data <- output_data %>%
mutate(error = abs(predicted - actual))
wmape <- sum(output_data$error) / sum(output_data$actual) * 100
print(paste("WMAPE:", wmape, "%"))
output_dir <- "C:/Users/ecemozturk/Desktop"
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
file_path <- file.path(output_dir, "predictions_hour_11_with_actuals.xlsx")
write_xlsx(output_data, file_path)
print(output_data)
[1] "WMAPE: 19.6298861504385 %"
Key: <date>
date actual predicted error
<IDat> <num> <num> <num>
1: 2024-02-01 3.53 9.113857 5.5838567
2: 2024-02-02 9.88 9.598790 0.2812103
3: 2024-02-03 9.09 7.347482 1.7425182
4: 2024-02-04 6.96 7.403292 0.4432921
5: 2024-02-05 4.12 5.898651 1.7786511
---
101: 2024-05-11 5.76 7.810759 2.0507586
102: 2024-05-12 0.00 5.750518 5.7505177
103: 2024-05-13 8.36 4.728926 3.6310744
104: 2024-05-14 5.84 7.389215 1.5492146
105: 2024-05-15 9.82 8.429268 1.3907323
# Create a trend variable for hour 12
hour_12_data <- all_data[all_data$hour == 12, ]
hour_12_data <- hour_12_data[,-c(2)]
hour_12_data[, lag_1_production := shift(production,1)]
hour_12_data[,lag_1_diff:=production-lag_1_production]
hour_12_data <- hour_12_data[!is.na(lag_1_production)]
hour_12_data$trend_hour_12 <- 1:nrow(hour_12_data)
lm_hour_12 <- lm(production ~ +lag_1_production+trend_hour_12+csnow_surface+dlwrf_surface+tmp_surface+hourly_cloud_average+is.weekend+is.religousday+is.nationalday+month*hourly_max_t , data = hour_12_data)
summary(lm_hour_12)
checkresiduals(lm_hour_12)
plot(lm_hour_12)
Call:
lm(formula = production ~ +lag_1_production + trend_hour_12 +
csnow_surface + dlwrf_surface + tmp_surface + hourly_cloud_average +
is.weekend + is.religousday + is.nationalday + month * hourly_max_t,
data = hour_12_data)
Residuals:
Min 1Q Median 3Q Max
-8.5394 -0.7375 0.2502 1.1136 6.5160
Coefficients: (1 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) -4.327e+01 3.850e+01 -1.124 0.26137
lag_1_production 9.071e-02 2.827e-02 3.209 0.00138 **
trend_hour_12 -7.090e-04 3.195e-04 -2.219 0.02677 *
csnow_surface -1.375e+00 5.384e-01 -2.554 0.01081 *
dlwrf_surface -3.082e-02 4.333e-03 -7.113 2.44e-12 ***
tmp_surface 2.000e-01 1.218e-01 1.641 0.10110
hourly_cloud_average -1.622e-02 5.297e-03 -3.062 0.00227 **
is.weekend -3.110e-01 1.531e-01 -2.031 0.04257 *
is.religousday 1.213e-01 4.404e-01 0.275 0.78305
is.nationalday 3.109e-01 5.763e-01 0.540 0.58969
monthAra -4.290e+01 4.372e+01 -0.981 0.32671
monthEki 5.402e+00 4.385e+01 0.123 0.90198
monthEyl 1.075e+01 4.171e+01 0.258 0.79661
monthHaz 1.580e+01 4.355e+01 0.363 0.71694
monthKas -2.455e+00 4.062e+01 -0.060 0.95183
monthMar 3.075e+01 3.939e+01 0.781 0.43513
monthMay 3.139e+01 4.056e+01 0.774 0.43920
monthNis 1.588e+01 3.993e+01 0.398 0.69096
monthOca -4.011e+01 4.033e+01 -0.995 0.32026
monthŞub 4.898e+01 3.964e+01 1.235 0.21701
monthTem 3.862e+01 4.401e+01 0.878 0.38042
hourly_max_t NA NA NA NA
monthAra:hourly_max_t 1.574e-01 1.416e-01 1.112 0.26663
monthEki:hourly_max_t -1.524e-02 1.401e-01 -0.109 0.91336
monthEyl:hourly_max_t -3.368e-02 1.319e-01 -0.255 0.79852
monthHaz:hourly_max_t -5.101e-02 1.386e-01 -0.368 0.71283
monthKas:hourly_max_t 1.198e-02 1.293e-01 0.093 0.92622
monthMar:hourly_max_t -9.976e-02 1.248e-01 -0.799 0.42442
monthMay:hourly_max_t -1.038e-01 1.286e-01 -0.807 0.41968
monthNis:hourly_max_t -5.067e-02 1.265e-01 -0.401 0.68879
monthOca:hourly_max_t 1.482e-01 1.288e-01 1.151 0.25025
monthŞub:hourly_max_t -1.658e-01 1.261e-01 -1.316 0.18864
monthTem:hourly_max_t -1.230e-01 1.394e-01 -0.882 0.37792
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.019 on 833 degrees of freedom
Multiple R-squared: 0.5384, Adjusted R-squared: 0.5212
F-statistic: 31.34 on 31 and 833 DF, p-value: < 2.2e-16
Breusch-Godfrey test for serial correlation of order up to 36 data: Residuals LM test = 32.413, df = 36, p-value = 0.6399
ggplot(hour_12_data, aes(x = date, y = production)) +
geom_line() +
labs(title = "Hourly Production Data for Hour 12",
x = "Date",
y = "Production") +
theme_minimal()
Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>”
hour_12_data <- all_data %>%
filter(hour == 12, date >= start_date & date <= end_date)
forecast_data_hour_12 <- to_be_forecasted %>%
filter(hour == 12)
forecast_data_hour_12$trend_hour_12 <- nrow(hour_12_data) + 1:nrow(forecast_data_hour_12)
last_production_value <- tail(hour_12_data$production, 1)
forecast_data_hour_12 <- forecast_data_hour_12 %>%
mutate(lag_1_production = last_production_value)
predictions_hour_12 <- predict(lm_hour_12, newdata = forecast_data_hour_12)
predictions_hour_12[predictions_hour_12 < 0] <- 0
predictions_hour_12_df <- data.frame(
date = forecast_data_hour_12$date,
predicted = predictions_hour_12
)
merged_data <- merge(hour_12_data, predictions_hour_12_df, by = "date")
output_data <- merged_data %>%
select(date, actual = production, predicted)
output_data <- output_data %>%
mutate(error = abs(predicted - actual))
wmape <- sum(output_data$error) / sum(output_data$actual) * 100
print(paste("WMAPE:", wmape, "%"))
output_dir <- "C:/Users/ecemozturk/Desktop"
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
file_path <- file.path(output_dir, "predictions_hour_12_with_actuals.xlsx")
write_xlsx(output_data, file_path)
print(output_data)
[1] "WMAPE: 24.145872036254 %"
Key: <date>
date actual predicted error
<IDat> <num> <num> <num>
1: 2024-02-01 4.92 9.065805 4.1458054
2: 2024-02-02 9.85 9.499731 0.3502688
3: 2024-02-03 8.71 7.153421 1.5565785
4: 2024-02-04 9.19 7.395700 1.7942998
5: 2024-02-05 2.85 5.925434 3.0754340
---
101: 2024-05-11 2.25 8.158062 5.9080618
102: 2024-05-12 0.00 5.995392 5.9953918
103: 2024-05-13 5.34 5.967664 0.6276639
104: 2024-05-14 7.78 8.504438 0.7244383
105: 2024-05-15 7.92 8.874813 0.9548128
# Create a trend variable for hour 13
hour_13_data <- all_data[all_data$hour == 13, ]
hour_13_data <- hour_13_data[,-c(2)]
hour_13_data[, lag_1_production := shift(production,1)]
hour_13_data[,lag_1_diff:=production-lag_1_production]
# Remove rows with NA in lagged production to ensure the model can run
hour_13_data <- hour_13_data[!is.na(lag_1_production)]
# Remove rows with missing values in the hourly_max_t column
hour_13_data$trend_hour_13 <- 1:nrow(hour_13_data)
lm_hour_13 <- lm(production ~+lag_1_production +trend_hour_13+dlwrf_surface+tmp_surface+hourly_cloud_average+is.weekend+is.nationalday+is.publicholiday+month * hourly_max_t , data = hour_13_data)
summary(lm_hour_13)
checkresiduals(lm_hour_13)
plot(lm_hour_13)
Call:
lm(formula = production ~ +lag_1_production + trend_hour_13 +
dlwrf_surface + tmp_surface + hourly_cloud_average + is.weekend +
is.nationalday + is.publicholiday + month * hourly_max_t,
data = hour_13_data)
Residuals:
Min 1Q Median 3Q Max
-7.7178 -0.9401 0.3124 1.2178 6.2601
Coefficients: (1 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) -8.568e+01 3.993e+01 -2.146 0.032188 *
lag_1_production 9.508e-02 2.783e-02 3.416 0.000666 ***
trend_hour_13 -6.315e-04 3.237e-04 -1.951 0.051391 .
dlwrf_surface -3.221e-02 4.352e-03 -7.401 3.29e-13 ***
tmp_surface 3.333e-01 1.259e-01 2.647 0.008275 **
hourly_cloud_average -2.541e-02 5.556e-03 -4.573 5.54e-06 ***
is.weekend 3.282e+00 1.184e+00 2.773 0.005670 **
is.nationalday 1.910e+00 8.175e-01 2.336 0.019730 *
is.publicholiday -3.571e+00 1.194e+00 -2.991 0.002866 **
monthAra 1.945e+01 4.467e+01 0.435 0.663442
monthEki 7.421e+01 4.535e+01 1.636 0.102127
monthEyl 6.383e+01 4.335e+01 1.472 0.141287
monthHaz 6.102e+01 4.485e+01 1.360 0.174059
monthKas 3.433e+01 4.208e+01 0.816 0.414872
monthMar 6.470e+01 4.067e+01 1.591 0.112032
monthMay 8.486e+01 4.199e+01 2.021 0.043580 *
monthNis 4.363e+01 4.147e+01 1.052 0.293125
monthOca 1.388e+01 4.160e+01 0.334 0.738782
monthŞub 8.158e+01 4.100e+01 1.990 0.046951 *
monthTem 7.787e+01 4.461e+01 1.746 0.081257 .
hourly_max_t NA NA NA NA
monthAra:hourly_max_t -4.841e-02 1.438e-01 -0.337 0.736503
monthEki:hourly_max_t -2.372e-01 1.444e-01 -1.642 0.100914
monthEyl:hourly_max_t -2.004e-01 1.367e-01 -1.467 0.142840
monthHaz:hourly_max_t -1.920e-01 1.423e-01 -1.350 0.177469
monthKas:hourly_max_t -1.038e-01 1.335e-01 -0.777 0.437101
monthMar:hourly_max_t -2.034e-01 1.284e-01 -1.584 0.113635
monthMay:hourly_max_t -2.722e-01 1.328e-01 -2.050 0.040671 *
monthNis:hourly_max_t -1.350e-01 1.310e-01 -1.031 0.302951
monthOca:hourly_max_t -2.678e-02 1.323e-01 -0.202 0.839604
monthŞub:hourly_max_t -2.628e-01 1.298e-01 -2.024 0.043250 *
monthTem:hourly_max_t -2.451e-01 1.408e-01 -1.741 0.082065 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.095 on 834 degrees of freedom
Multiple R-squared: 0.5576, Adjusted R-squared: 0.5417
F-statistic: 35.05 on 30 and 834 DF, p-value: < 2.2e-16
Breusch-Godfrey test for serial correlation of order up to 35 data: Residuals LM test = 56.897, df = 35, p-value = 0.01108
ggplot(hour_13_data, aes(x = date, y = production)) +
geom_line() +
labs(title = "Hourly Production Data for Hour 13",
x = "Date",
y = "Production") +
theme_minimal()
Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>”
hour_13_data <- all_data %>%
filter(hour == 13, date >= start_date & date <= end_date)
forecast_data_hour_13 <- to_be_forecasted %>%
filter(hour == 13)
forecast_data_hour_13$trend_hour_13 <- nrow(hour_13_data) + 1:nrow(forecast_data_hour_13)
last_production_value <- tail(hour_13_data$production, 1)
forecast_data_hour_13 <- forecast_data_hour_13 %>%
mutate(lag_1_production = last_production_value)
predictions_hour_13 <- predict(lm_hour_13, newdata = forecast_data_hour_13)
predictions_hour_13[predictions_hour_13 < 0] <- 0
predictions_hour_13_df <- data.frame(
date = forecast_data_hour_13$date,
predicted = predictions_hour_13
)
merged_data <- merge(hour_13_data, predictions_hour_13_df, by = "date")
output_data <- merged_data %>%
select(date, actual = production, predicted)
output_data <- output_data %>%
mutate(error = abs(predicted - actual))
wmape <- sum(output_data$error) / sum(output_data$actual) * 100
print(paste("WMAPE:", wmape, "%"))
output_dir <- "C:/Users/ecemozturk/Desktop"
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
file_path <- file.path(output_dir, "predictions_hour_13_with_actuals.xlsx")
write_xlsx(output_data, file_path)
print(output_data)
[1] "WMAPE: 23.9313334909365 %"
Key: <date>
date actual predicted error
<IDat> <num> <num> <num>
1: 2024-02-01 4.90 8.903164 4.0031635
2: 2024-02-02 9.85 9.521604 0.3283964
3: 2024-02-03 8.02 6.644535 1.3754651
4: 2024-02-04 9.00 7.239842 1.7601575
5: 2024-02-05 3.01 5.773566 2.7635661
---
101: 2024-05-11 3.67 8.253055 4.5830546
102: 2024-05-12 0.00 5.725042 5.7250422
103: 2024-05-13 4.04 5.886979 1.8469792
104: 2024-05-14 9.64 8.951888 0.6881117
105: 2024-05-15 7.18 8.784960 1.6049597
hour_14_data <- all_data[all_data$hour == 14, ]
hour_14_data <- hour_14_data[,-c(2)]
hour_14_data$trend_hour_14 <- 1:nrow(hour_14_data)
hour_14_data[, lag_14_production := shift(production,1)]
hour_14_data[,lag_14_diff:=production-lag_14_production]
# Remove rows with NA in lagged production to ensure the model can run
hour_14_data <- hour_14_data[!is.na(lag_14_production)]
lm_hour_14 <- lm(production ~ +lag_14_production+ special_period+dlwrf_surface+tmp_surface+is.weekend+is.ramadan+is.religousday+is.nationalday+is.publicholiday+hourly_cloud_average+month*hourly_max_t , data = hour_14_data)
summary(lm_hour_14)
checkresiduals(lm_hour_14)
plot(lm_hour_14)
Call:
lm(formula = production ~ +lag_14_production + special_period +
dlwrf_surface + tmp_surface + is.weekend + is.ramadan + is.religousday +
is.nationalday + is.publicholiday + hourly_cloud_average +
month * hourly_max_t, data = hour_14_data)
Residuals:
Min 1Q Median 3Q Max
-8.4902 -1.0601 0.2299 1.2149 6.5299
Coefficients: (1 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) -49.229332 33.355821 -1.476 0.14035
lag_14_production 0.107539 0.028577 3.763 0.00018 ***
special_period 0.407887 0.218537 1.866 0.06233 .
dlwrf_surface -0.025869 0.004248 -6.090 1.72e-09 ***
tmp_surface 0.208630 0.105379 1.980 0.04805 *
is.weekend 3.323860 1.135289 2.928 0.00351 **
is.ramadan 0.207241 0.329407 0.629 0.52943
is.religousday -0.017660 0.432060 -0.041 0.96741
is.nationalday 1.884202 0.783760 2.404 0.01643 *
is.publicholiday -3.533244 1.145636 -3.084 0.00211 **
hourly_cloud_average -0.030869 0.005373 -5.746 1.28e-08 ***
monthAra 18.715850 38.743513 0.483 0.62917
monthEki 51.540702 39.514496 1.304 0.19248
monthEyl 31.333769 37.469047 0.836 0.40325
monthHaz -1.556742 39.088677 -0.040 0.96824
monthKas 24.578372 35.946040 0.684 0.49432
monthMar 43.898404 34.298011 1.280 0.20093
monthMay 62.696698 35.725713 1.755 0.07964 .
monthNis 27.908032 35.149466 0.794 0.42743
monthOca -5.164791 35.170800 -0.147 0.88329
monthŞub 48.080570 34.544043 1.392 0.16434
monthTem 55.405723 38.165297 1.452 0.14695
hourly_max_t NA NA NA NA
monthAra:hourly_max_t -0.062183 0.125821 -0.494 0.62128
monthEki:hourly_max_t -0.171335 0.126625 -1.353 0.17640
monthEyl:hourly_max_t -0.100101 0.118579 -0.844 0.39882
monthHaz:hourly_max_t 0.008465 0.124695 0.068 0.94589
monthKas:hourly_max_t -0.081753 0.114721 -0.713 0.47628
monthMar:hourly_max_t -0.141418 0.108718 -1.301 0.19370
monthMay:hourly_max_t -0.204053 0.113489 -1.798 0.07254 .
monthNis:hourly_max_t -0.088926 0.111450 -0.798 0.42516
monthOca:hourly_max_t 0.026587 0.112360 0.237 0.81301
monthŞub:hourly_max_t -0.157881 0.109816 -1.438 0.15090
monthTem:hourly_max_t -0.175299 0.120809 -1.451 0.14715
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.012 on 832 degrees of freedom
Multiple R-squared: 0.5738, Adjusted R-squared: 0.5574
F-statistic: 35 on 32 and 832 DF, p-value: < 2.2e-16
Breusch-Godfrey test for serial correlation of order up to 37 data: Residuals LM test = 44.803, df = 37, p-value = 0.1771
ggplot(hour_14_data, aes(x = date, y = production)) +
geom_line() +
labs(title = "Hourly Production Data for Hour 14",
x = "Date",
y = "Production") +
theme_minimal()
Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>”
hour_14_data <- all_data %>%
filter(hour == 14, date >= start_date & date <= end_date)
forecast_data_hour_14 <- to_be_forecasted %>%
filter(hour == 14)
forecast_data_hour_14$trend_hour_14 <- nrow(hour_14_data) + 1:nrow(forecast_data_hour_14)
last_production_value <- tail(hour_14_data$production, 1)
forecast_data_hour_14 <- forecast_data_hour_14 %>%
mutate(lag_14_production = last_production_value)
predictions_hour_14 <- predict(lm_hour_14, newdata = forecast_data_hour_14)
predictions_hour_14[predictions_hour_14 < 0] <- 0
predictions_hour_14_df <- data.frame(
date = forecast_data_hour_14$date,
predicted = predictions_hour_14
)
merged_data <- merge(hour_14_data, predictions_hour_14_df, by = "date")
output_data <- merged_data %>%
select(date, actual = production, predicted)
output_data <- output_data %>%
mutate(error = abs(predicted - actual))
wmape <- sum(output_data$error) / sum(output_data$actual) * 100
print(paste("WMAPE:", wmape, "%"))
output_dir <- "C:/Users/ecemozturk/Desktop"
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
file_path <- file.path(output_dir, "predictions_hour_14_with_actuals.xlsx")
write_xlsx(output_data, file_path)
print(output_data)
[1] "WMAPE: 25.2414535555969 %"
Key: <date>
date actual predicted error
<IDat> <num> <num> <num>
1: 2024-02-01 5.78 7.594635 1.8146355
2: 2024-02-02 8.85 8.247988 0.6020120
3: 2024-02-03 4.37 5.269883 0.8998832
4: 2024-02-04 3.25 5.966098 2.7160984
5: 2024-02-05 1.78 4.643374 2.8633739
---
101: 2024-05-11 1.91 7.444803 5.5348028
102: 2024-05-12 0.00 5.197135 5.1971350
103: 2024-05-13 5.89 5.293584 0.5964157
104: 2024-05-14 9.70 8.215896 1.4841037
105: 2024-05-15 6.00 7.685520 1.6855205
hour_15_data <- all_data[all_data$hour == 15, ]
hour_15_data <- hour_15_data[,-c(2)]
hour_15_data$trend_hour_15 <- 1:nrow(hour_15_data)
hour_15_data[, lag_15_production := shift(production,1)]
hour_15_data[,lag_15_diff:=production-lag_15_production]
# Remove rows with NA in lagged production to ensure the model can run
hour_15_data <- hour_15_data[!is.na(lag_15_production)]
lm_hour_15 <- lm(production ~+lag_15_production+trend_hour_15+special_period+dswrf_surface+uswrf_top_of_atmosphere+dlwrf_surface+hourly_cloud_average+tmp_surface+is.weekend+is.ramadan+is.publicholiday +month*hourly_max_t , data = hour_15_data)
summary(lm_hour_15)
checkresiduals(lm_hour_15)
plot(lm_hour_15)
Call:
lm(formula = production ~ +lag_15_production + trend_hour_15 +
special_period + dswrf_surface + uswrf_top_of_atmosphere +
dlwrf_surface + hourly_cloud_average + tmp_surface + is.weekend +
is.ramadan + is.publicholiday + month * hourly_max_t, data = hour_15_data)
Residuals:
Min 1Q Median 3Q Max
-6.2016 -0.8674 0.0785 1.0296 6.1278
Coefficients: (1 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.027e+01 2.290e+01 -1.322 0.186554
lag_15_production 1.126e-01 2.954e-02 3.812 0.000148 ***
trend_hour_15 -9.099e-04 3.005e-04 -3.028 0.002542 **
special_period 4.393e-01 2.064e-01 2.128 0.033592 *
dswrf_surface 2.794e-03 1.325e-03 2.108 0.035292 *
uswrf_top_of_atmosphere 9.910e-03 1.994e-03 4.970 8.15e-07 ***
dlwrf_surface -2.490e-02 5.054e-03 -4.926 1.01e-06 ***
hourly_cloud_average -2.727e-02 4.696e-03 -5.806 9.08e-09 ***
tmp_surface 1.325e-01 7.381e-02 1.795 0.073075 .
is.weekend 1.330e+00 6.940e-01 1.916 0.055651 .
is.ramadan 4.968e-01 2.717e-01 1.828 0.067875 .
is.publicholiday -1.619e+00 6.894e-01 -2.348 0.019120 *
monthAra 5.860e+00 2.896e+01 0.202 0.839681
monthEki 7.430e+00 2.907e+01 0.256 0.798293
monthEyl -1.439e+01 2.749e+01 -0.524 0.600686
monthHaz -5.492e+01 2.830e+01 -1.940 0.052663 .
monthKas 1.529e+01 2.573e+01 0.594 0.552418
monthMar -2.894e+00 2.435e+01 -0.119 0.905416
monthMay -9.822e+00 2.627e+01 -0.374 0.708613
monthNis -9.130e+00 2.481e+01 -0.368 0.712980
monthOca -4.429e+01 2.542e+01 -1.743 0.081788 .
monthŞub -1.830e+01 2.448e+01 -0.747 0.455068
monthTem -5.457e+00 2.763e+01 -0.197 0.843492
hourly_max_t NA NA NA NA
monthAra:hourly_max_t -2.336e-02 9.604e-02 -0.243 0.807928
monthEki:hourly_max_t -2.945e-02 9.430e-02 -0.312 0.754848
monthEyl:hourly_max_t 4.475e-02 8.781e-02 0.510 0.610475
monthHaz:hourly_max_t 1.811e-01 9.127e-02 1.984 0.047590 *
monthKas:hourly_max_t -5.652e-02 8.323e-02 -0.679 0.497289
monthMar:hourly_max_t 9.090e-03 7.828e-02 0.116 0.907586
monthMay:hourly_max_t 3.347e-02 8.465e-02 0.395 0.692666
monthNis:hourly_max_t 2.897e-02 7.948e-02 0.364 0.715603
monthOca:hourly_max_t 1.582e-01 8.305e-02 1.905 0.057099 .
monthŞub:hourly_max_t 6.563e-02 7.907e-02 0.830 0.406755
monthTem:hourly_max_t 1.707e-02 8.818e-02 0.194 0.846550
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.65 on 830 degrees of freedom
(1 observation deleted due to missingness)
Multiple R-squared: 0.6737, Adjusted R-squared: 0.6608
F-statistic: 51.94 on 33 and 830 DF, p-value: < 2.2e-16
Breusch-Godfrey test for serial correlation of order up to 38 data: Residuals LM test = 50.289, df = 38, p-value = 0.08757
ggplot(hour_15_data, aes(x = date, y = production)) +
geom_line() +
labs(title = "Hourly Production Data for Hour 15",
x = "Date",
y = "Production") +
theme_minimal()
Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>”
# Filter the actual data for hour 15 and the specific date range
hour_15_data <- all_data %>%
filter(hour == 15, date >= start_date & date <= end_date)
# Filter the to_be_forecasted data for hour 15
forecast_data_hour_15 <- to_be_forecasted %>%
filter(hour == 15)
# Add the trend variable for hour 15
forecast_data_hour_15$trend_hour_15 <- nrow(hour_15_data) + 1:nrow(forecast_data_hour_15)
# Add the last known production value from hour_15_data to forecast_data_hour_15 for creating the lagged variable
last_production_value <- tail(hour_15_data$production, 1)
forecast_data_hour_15 <- forecast_data_hour_15 %>%
mutate(lag_15_production = last_production_value)
# Make predictions
predictions_hour_15 <- predict(lm_hour_15, newdata = forecast_data_hour_15)
predictions_hour_15[predictions_hour_15 < 0] <- 0
# Create a dataframe with predictions and corresponding dates
predictions_hour_15_df <- data.frame(
date = forecast_data_hour_15$date, # Assuming 'date' column exists in forecast_data_hour_15
predicted = predictions_hour_15
)
# Merge with actual values
merged_data <- merge(hour_15_data, predictions_hour_15_df, by = "date")
# Select and rename the columns to keep
output_data <- merged_data %>%
select(date, actual = production, predicted)
# Calculate WMAPE
output_data <- output_data %>%
mutate(error = abs(predicted - actual))
wmape <- sum(output_data$error) / sum(output_data$actual) * 100
# Print WMAPE
print(paste("WMAPE:", wmape, "%"))
# Ensure the directory exists
output_dir <- "C:/Users/ecemozturk/Desktop"
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
# Specify the file path
file_path <- file.path(output_dir, "predictions_hour_15_with_actuals.xlsx")
# Write the combined data to an Excel file
write_xlsx(output_data, file_path)
# Print the combined data
print(output_data)
[1] "WMAPE: 31.6315801980676 %"
Key: <date>
date actual predicted error
<IDat> <num> <num> <num>
1: 2024-02-01 4.78 4.321723 0.4582772
2: 2024-02-02 5.64 4.932761 0.7072392
3: 2024-02-03 3.24 2.428159 0.8118405
4: 2024-02-04 3.71 2.997038 0.7129616
5: 2024-02-05 1.88 2.282787 0.4027869
---
101: 2024-05-11 1.24 5.956781 4.7167814
102: 2024-05-12 0.00 4.214975 4.2149754
103: 2024-05-13 5.32 4.455304 0.8646957
104: 2024-05-14 7.82 6.907217 0.9127826
105: 2024-05-15 3.11 6.406795 3.2967952
hour_16_data <- all_data[all_data$hour == 16, ]
hour_16_data <- hour_16_data[,-c(2)]
hour_16_data$trend_hour_16 <- 1:nrow(hour_16_data)
hour_16_data[, lag_16_production := shift(production,1)]
hour_16_data[,lag_16_diff:=production-lag_16_production]
# Remove rows with NA in lagged production to ensure the model can run
hour_16_data <- hour_16_data[!is.na(lag_16_production)]
lm_hour_16 <- lm(production ~+lag_16_production+trend_hour_16+ special_period+dswrf_surface+uswrf_top_of_atmosphere+hourly_cloud_average+is.ramadan+month*hourly_max_t, data = hour_16_data)
summary(lm_hour_16)
checkresiduals(lm_hour_16)
plot(lm_hour_16)
Call:
lm(formula = production ~ +lag_16_production + trend_hour_16 +
special_period + dswrf_surface + uswrf_top_of_atmosphere +
hourly_cloud_average + is.ramadan + month * hourly_max_t,
data = hour_16_data)
Residuals:
Min 1Q Median 3Q Max
-6.2282 -0.6438 -0.0160 0.5996 8.0625
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.935e+00 1.834e+01 0.160 0.872886
lag_16_production 2.737e-01 3.176e-02 8.616 < 2e-16 ***
trend_hour_16 -8.723e-04 2.400e-04 -3.635 0.000295 ***
special_period 5.022e-01 1.667e-01 3.012 0.002674 **
dswrf_surface 5.930e-03 1.040e-03 5.704 1.62e-08 ***
uswrf_top_of_atmosphere 4.940e-03 1.611e-03 3.066 0.002240 **
hourly_cloud_average -7.908e-03 3.141e-03 -2.517 0.012012 *
is.ramadan 5.014e-01 2.192e-01 2.287 0.022432 *
monthAra -1.212e+01 2.413e+01 -0.502 0.615601
monthEki 1.546e+01 2.343e+01 0.660 0.509576
monthEyl -1.404e+01 2.254e+01 -0.623 0.533494
monthHaz -3.036e+01 2.231e+01 -1.361 0.173812
monthKas 1.182e+01 2.110e+01 0.560 0.575421
monthMar -1.229e+01 1.938e+01 -0.634 0.526199
monthMay -2.709e+00 2.052e+01 -0.132 0.894991
monthNis -1.475e+01 1.961e+01 -0.752 0.452165
monthOca -3.380e+01 2.039e+01 -1.658 0.097778 .
monthŞub -2.585e+01 1.950e+01 -1.325 0.185439
monthTem -2.035e+01 2.256e+01 -0.902 0.367252
hourly_max_t -1.297e-02 5.896e-02 -0.220 0.825912
monthAra:hourly_max_t 4.147e-02 8.127e-02 0.510 0.609986
monthEki:hourly_max_t -5.615e-02 7.697e-02 -0.730 0.465899
monthEyl:hourly_max_t 4.373e-02 7.291e-02 0.600 0.548823
monthHaz:hourly_max_t 9.828e-02 7.265e-02 1.353 0.176480
monthKas:hourly_max_t -4.340e-02 6.926e-02 -0.627 0.531106
monthMar:hourly_max_t 3.834e-02 6.301e-02 0.608 0.543040
monthMay:hourly_max_t 7.239e-03 6.681e-02 0.108 0.913744
monthNis:hourly_max_t 4.693e-02 6.348e-02 0.739 0.459948
monthOca:hourly_max_t 1.189e-01 6.741e-02 1.764 0.078062 .
monthŞub:hourly_max_t 8.877e-02 6.368e-02 1.394 0.163703
monthTem:hourly_max_t 6.380e-02 7.276e-02 0.877 0.380820
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.33 on 834 degrees of freedom
Multiple R-squared: 0.6828, Adjusted R-squared: 0.6714
F-statistic: 59.84 on 30 and 834 DF, p-value: < 2.2e-16
Breusch-Godfrey test for serial correlation of order up to 34 data: Residuals LM test = 76.913, df = 34, p-value = 3.623e-05
ggplot(hour_16_data, aes(x = date, y = production)) +
geom_line() +
labs(title = "Hourly Production Data for Hour 16",
x = "Date",
y = "Production") +
theme_minimal()
Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>”
# Filter the actual data for hour 16 and the specific date range
hour_16_data <- all_data %>%
filter(hour == 16, date >= start_date & date <= end_date)
# Filter the to_be_forecasted data for hour 16
forecast_data_hour_16 <- to_be_forecasted %>%
filter(hour == 16)
# Add the trend variable for hour 16
forecast_data_hour_16$trend_hour_16 <- nrow(hour_16_data) + 1:nrow(forecast_data_hour_16)
# Add the last known production value from hour_16_data to forecast_data_hour_16 for creating the lagged variable
last_production_value <- tail(hour_16_data$production, 1)
forecast_data_hour_16 <- forecast_data_hour_16 %>%
mutate(lag_16_production = last_production_value)
# Make predictions
predictions_hour_16 <- predict(lm_hour_16, newdata = forecast_data_hour_16)
predictions_hour_16[predictions_hour_16 < 0] <- 0
# Create a dataframe with predictions and corresponding dates
predictions_hour_16_df <- data.frame(
date = forecast_data_hour_16$date, # Assuming 'date' column exists in forecast_data_hour_16
predicted = predictions_hour_16
)
# Merge with actual values
merged_data <- merge(hour_16_data, predictions_hour_16_df, by = "date")
# Select and rename the columns to keep
output_data <- merged_data %>%
select(date, actual = production, predicted)
# Calculate WMAPE
output_data <- output_data %>%
mutate(error = abs(predicted - actual))
wmape <- sum(output_data$error) / sum(output_data$actual) * 100
# Print WMAPE
print(paste("WMAPE:", wmape, "%"))
# Ensure the directory exists
output_dir <- "C:/Users/ecemozturk/Desktop"
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
# Specify the file path
file_path <- file.path(output_dir, "predictions_hour_16_with_actuals.xlsx")
# Write the combined data to an Excel file
write_xlsx(output_data, file_path)
# Print the combined data
print(output_data)
[1] "WMAPE: 39.6096503905389 %"
Key: <date>
date actual predicted error
<IDat> <num> <num> <num>
1: 2024-02-01 0.53 0.9404273 0.4104273
2: 2024-02-02 1.53 1.1416612 0.3883388
3: 2024-02-03 0.94 0.3487044 0.5912956
4: 2024-02-04 0.66 0.5928544 0.0671456
5: 2024-02-05 0.00 0.4192077 0.4192077
---
101: 2024-05-11 0.78 2.9014597 2.1214597
102: 2024-05-12 0.00 2.3909400 2.3909400
103: 2024-05-13 3.62 2.3220644 1.2979356
104: 2024-05-14 4.55 3.7036654 0.8463346
105: 2024-05-15 0.00 3.0294696 3.0294696
hour_17_data <- all_data[all_data$hour == 17, ]
hour_17_data <- hour_17_data[,-c(2)]
hour_17_data$trend_hour_17 <- 1:nrow(hour_17_data)
hour_17_data$nm <- as.numeric(format(hour_17_data$date, "%m"))
# Assuming your date column is named 'date_column'
hour_17_data$is_not_winter <- as.numeric(!hour_17_data$nm %in% c(12, 1, 2))
hour_17_data <- as.data.table(hour_17_data)
#hour_17_data <- hour_17_data[production > 0]
#hour_17_data[, log_production := log(production)]
hour_17_data[, lag_17_production := shift(production,1)]
hour_17_data[,lag_17_diff:=production-lag_17_production]
# Remove rows with NA in lagged production to ensure the model can run
hour_17_data <- hour_17_data[!is.na(lag_17_production)]
hour_17_data <- hour_17_data[!is.na(lag_17_diff)]
lm_hour_17 <- lm(production ~+lag_17_production+trend_hour_17+special_period+is.ramadan+is.religousday+uswrf_surface+uswrf_top_of_atmosphere+dswrf_surface+month*hourly_max_t, data = hour_17_data)
summary(lm_hour_17)
checkresiduals(lm_hour_17)
plot(lm_hour_17)
Call:
lm(formula = production ~ +lag_17_production + trend_hour_17 +
special_period + is.ramadan + is.religousday + uswrf_surface +
uswrf_top_of_atmosphere + dswrf_surface + month * hourly_max_t,
data = hour_17_data)
Residuals:
Min 1Q Median 3Q Max
-3.2207 -0.2810 -0.0078 0.1988 4.0676
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.0813716 11.2734156 -0.185 0.8536
lag_17_production 0.4986512 0.0302889 16.463 < 2e-16 ***
trend_hour_17 -0.0007052 0.0001477 -4.774 2.13e-06 ***
special_period 0.2365340 0.0964244 2.453 0.0144 *
is.ramadan 0.2787440 0.1268847 2.197 0.0283 *
is.religousday -0.1523958 0.1673489 -0.911 0.3627
uswrf_surface -0.0044092 0.0026311 -1.676 0.0941 .
uswrf_top_of_atmosphere 0.0012367 0.0011655 1.061 0.2890
dswrf_surface 0.0030840 0.0012708 2.427 0.0154 *
monthAra -0.2137688 14.5499366 -0.015 0.9883
monthEki 2.5256493 14.4314141 0.175 0.8611
monthEyl 8.8020038 13.7692349 0.639 0.5228
monthHaz 11.8663195 13.8225292 0.858 0.3909
monthKas 6.1699317 13.0493808 0.473 0.6365
monthMar 7.6478407 12.1969094 0.627 0.5308
monthMay 11.6937865 12.8830519 0.908 0.3643
monthNis -0.2445660 12.1437403 -0.020 0.9839
monthOca -1.0584106 12.3161696 -0.086 0.9315
monthŞub 5.8132312 12.3323010 0.471 0.6375
monthTem -8.7422014 14.4185303 -0.606 0.5445
hourly_max_t 0.0062948 0.0369846 0.170 0.8649
monthAra:hourly_max_t 0.0017844 0.0495986 0.036 0.9713
monthEki:hourly_max_t -0.0097040 0.0480400 -0.202 0.8400
monthEyl:hourly_max_t -0.0308600 0.0451584 -0.683 0.4946
monthHaz:hourly_max_t -0.0409155 0.0454998 -0.899 0.3688
monthKas:hourly_max_t -0.0215024 0.0435056 -0.494 0.6213
monthMar:hourly_max_t -0.0283603 0.0403228 -0.703 0.4820
monthMay:hourly_max_t -0.0400826 0.0424810 -0.944 0.3457
monthNis:hourly_max_t -0.0001357 0.0398308 -0.003 0.9973
monthOca:hourly_max_t 0.0042546 0.0410731 0.104 0.9175
monthŞub:hourly_max_t -0.0211684 0.0409375 -0.517 0.6052
monthTem:hourly_max_t 0.0273820 0.0470553 0.582 0.5608
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.7635 on 833 degrees of freedom
Multiple R-squared: 0.6759, Adjusted R-squared: 0.6638
F-statistic: 56.03 on 31 and 833 DF, p-value: < 2.2e-16
Breusch-Godfrey test for serial correlation of order up to 35 data: Residuals LM test = 147.01, df = 35, p-value = 1.113e-15
ggplot(hour_17_data, aes(x = date, y = production)) +
geom_line() +
labs(title = "Hourly Production Data for Hour 17",
x = "Date",
y = "Production") +
theme_minimal()
Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>”
Log transformation for hour 17 was considered. However, we have decided not to use it since it requires elimination of zero values and disrupts continuity of our data.
# Filter the actual data for hour 17 and the specific date range
hour_17_data <- all_data %>%
filter(hour == 17, date >= start_date & date <= end_date)
# Filter the to_be_forecasted data for hour 17
forecast_data_hour_17 <- to_be_forecasted %>%
filter(hour == 17)
# Add the trend variable for hour 17
forecast_data_hour_17$trend_hour_17 <- nrow(hour_17_data) + 1:nrow(forecast_data_hour_17)
# Add the last known production value from hour_17_data to forecast_data_hour_17 for creating the lagged variable
last_production_value <- tail(hour_17_data$production, 1)
forecast_data_hour_17 <- forecast_data_hour_17 %>%
mutate(lag_17_production = last_production_value)
# Make predictions
predictions_hour_17 <- predict(lm_hour_17, newdata = forecast_data_hour_17)
predictions_hour_17[predictions_hour_17 < 0] <- 0
# Create a dataframe with predictions and corresponding dates
predictions_hour_17_df <- data.frame(
date = forecast_data_hour_17$date, # Assuming 'date' column exists in forecast_data_hour_17
predicted = predictions_hour_17
)
# Merge with actual values
merged_data <- merge(hour_17_data, predictions_hour_17_df, by = "date")
# Select and rename the columns to keep
output_data <- merged_data %>%
select(date, actual = production, predicted)
# Calculate WMAPE
output_data <- output_data %>%
mutate(error = abs(predicted - actual))
wmape <- sum(output_data$error) / sum(output_data$actual) * 100
# Print WMAPE
print(paste("WMAPE:", wmape, "%"))
# Ensure the directory exists
output_dir <- "C:/Users/ecemozturk/Desktop"
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
# Specify the file path
file_path <- file.path(output_dir, "predictions_hour_17_with_actuals.xlsx")
# Write the combined data to an Excel file
write_xlsx(output_data, file_path)
# Print the combined data
print(output_data)
[1] "WMAPE: 61.689983094304 %"
Key: <date>
date actual predicted error
<IDat> <num> <num> <num>
1: 2024-02-01 0.00 0.3053737 0.3053737
2: 2024-02-02 0.00 0.3176936 0.3176936
3: 2024-02-03 0.00 0.2818699 0.2818699
4: 2024-02-04 0.00 0.2827902 0.2827902
5: 2024-02-05 0.00 0.1942501 0.1942501
---
101: 2024-05-11 0.30 1.0700304 0.7700304
102: 2024-05-12 0.00 1.0176547 1.0176547
103: 2024-05-13 1.30 1.0150746 0.2849254
104: 2024-05-14 1.33 1.3800042 0.0500042
105: 2024-05-15 0.00 1.1531298 1.1531298
hour_18_data <- all_data[all_data$hour == 18, ]
hour_18_data <- hour_18_data[,-c(2)]
# Assuming your dataframe is named 'data' and the production column is named 'production'
#data_18_filtered <- hour_18_data[hour_18_data$production != 0, ]
hour_18_data$trend_hour_18 <- 1:nrow(hour_18_data)
hour_18_data[, lag_18_production := shift(production,1)]
hour_18_data[,lag_18_diff:=production-lag_18_production]
# Remove rows with NA in lagged production to ensure the model can run
hour_18_data <- hour_18_data[!is.na(lag_18_production)]
lm_hour_18 <- lm(production ~+lag_18_production +trend_hour_18+special_period+tmp_surface+dswrf_surface+month*hourly_max_t , data = hour_18_data)
summary(lm_hour_18)
checkresiduals(lm_hour_18)
plot(lm_hour_18)
Call:
lm(formula = production ~ +lag_18_production + trend_hour_18 +
special_period + tmp_surface + dswrf_surface + month * hourly_max_t,
data = hour_18_data)
Residuals:
Min 1Q Median 3Q Max
-0.6744 -0.0467 -0.0040 0.0102 3.7526
Coefficients: (1 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.370e+01 4.342e+00 -3.155 0.00166 **
lag_18_production 8.499e-02 3.446e-02 2.466 0.01385 *
trend_hour_18 2.837e-05 4.623e-05 0.614 0.53957
special_period 1.015e-01 3.311e-02 3.064 0.00226 **
tmp_surface 4.579e-02 1.437e-02 3.186 0.00149 **
dswrf_surface 2.093e-05 1.291e-04 0.162 0.87126
monthAra 1.374e+01 5.274e+00 2.606 0.00933 **
monthEki 1.297e+01 5.486e+00 2.363 0.01834 *
monthEyl 1.386e+01 5.222e+00 2.654 0.00811 **
monthHaz 1.621e+01 5.421e+00 2.991 0.00287 **
monthKas 1.366e+01 5.106e+00 2.676 0.00760 **
monthMar 1.392e+01 4.540e+00 3.066 0.00224 **
monthMay 1.224e+01 5.012e+00 2.442 0.01481 *
monthNis 1.345e+01 4.690e+00 2.869 0.00422 **
monthOca 1.389e+01 4.602e+00 3.018 0.00262 **
monthŞub 1.385e+01 4.524e+00 3.062 0.00227 **
monthTem 7.246e+00 5.681e+00 1.275 0.20254
hourly_max_t NA NA NA NA
monthAra:hourly_max_t -4.600e-02 1.800e-02 -2.555 0.01079 *
monthEki:hourly_max_t -4.346e-02 1.853e-02 -2.345 0.01924 *
monthEyl:hourly_max_t -4.656e-02 1.739e-02 -2.677 0.00758 **
monthHaz:hourly_max_t -5.379e-02 1.810e-02 -2.972 0.00304 **
monthKas:hourly_max_t -4.581e-02 1.727e-02 -2.653 0.00813 **
monthMar:hourly_max_t -4.664e-02 1.511e-02 -3.086 0.00209 **
monthMay:hourly_max_t -4.056e-02 1.676e-02 -2.420 0.01574 *
monthNis:hourly_max_t -4.498e-02 1.561e-02 -2.881 0.00407 **
monthOca:hourly_max_t -4.653e-02 1.541e-02 -3.019 0.00261 **
monthŞub:hourly_max_t -4.641e-02 1.509e-02 -3.076 0.00217 **
monthTem:hourly_max_t -2.342e-02 1.884e-02 -1.243 0.21417
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.2659 on 837 degrees of freedom
Multiple R-squared: 0.1977, Adjusted R-squared: 0.1719
F-statistic: 7.64 on 27 and 837 DF, p-value: < 2.2e-16
Breusch-Godfrey test for serial correlation of order up to 32 data: Residuals LM test = 207.6, df = 32, p-value < 2.2e-16
ggplot(hour_18_data, aes(x = date, y = production)) +
geom_line() +
labs(title = "Hourly Production Data for Hour 18",
x = "Date",
y = "Production") +
theme_minimal()
Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <c5>” Warning message in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, : “conversion failure on 'Şub' in 'mbcsToSbcs': dot substituted for <9e>”
# Filter the actual data for hour 18 and the specific date range
hour_18_data <- all_data %>%
filter(hour == 18, date >= start_date & date <= end_date)
# Filter the to_be_forecasted data for hour 18
forecast_data_hour_18 <- to_be_forecasted %>%
filter(hour == 18)
# Add the trend variable for hour 18
forecast_data_hour_18$trend_hour_18 <- nrow(hour_18_data) + 1:nrow(forecast_data_hour_18)
# Add the last known production value from hour_18_data to forecast_data_hour_18 for creating the lagged variable
last_production_value <- tail(hour_18_data$production, 1)
forecast_data_hour_18 <- forecast_data_hour_18 %>%
mutate(lag_18_production = last_production_value)
# Make predictions
predictions_hour_18 <- predict(lm_hour_18, newdata = forecast_data_hour_18)
predictions_hour_18[predictions_hour_18 < 0] <- 0
# Create a dataframe with predictions and corresponding dates
predictions_hour_18_df <- data.frame(
date = forecast_data_hour_18$date, # Assuming 'date' column exists in forecast_data_hour_18
predicted = predictions_hour_18
)
# Merge with actual values
merged_data <- merge(hour_18_data, predictions_hour_18_df, by = "date")
# Select and rename the columns to keep
output_data <- merged_data %>%
select(date, actual = production, predicted)
# Calculate WMAPE
output_data <- output_data %>%
mutate(error = abs(predicted - actual))
wmape <- sum(output_data$error) / sum(output_data$actual) * 100
# Print WMAPE
print(paste("WMAPE:", wmape, "%"))
# Print the combined data
print(output_data)
[1] "WMAPE: 87.6958599729125 %"
Key: <date>
date actual predicted error
<IDat> <num> <num> <num>
1: 2024-02-01 0.00 0.00000000 0.00000000
2: 2024-02-02 0.00 0.00000000 0.00000000
3: 2024-02-03 0.00 0.00000000 0.00000000
4: 2024-02-04 0.00 0.00000000 0.00000000
5: 2024-02-05 0.00 0.00000000 0.00000000
---
101: 2024-05-11 0.00 0.05853033 0.05853033
102: 2024-05-12 0.00 0.04082760 0.04082760
103: 2024-05-13 0.16 0.04495209 0.11504791
104: 2024-05-14 0.14 0.05715345 0.08284655
105: 2024-05-15 0.00 0.06877722 0.06877722
Conclusion for Linear Regression models: Especially for the hours in between 7 and 16, linear regression models and WMAPE values performs better than other hours mainly depending on the sun-set & sun-rise period. Although we tried to fit the production into a linear regression model, we still observe high WMAPE values in hours 4,5,6,17 and 18 because of having limited amount of data.